From fe2a5d56bf2e22b85b0ac85252dd1d3c47b5f43a Mon Sep 17 00:00:00 2001 From: Tobias Specht Date: Tue, 7 Nov 2023 15:03:46 +0100 Subject: [PATCH] Export cpg as json via Neo4j OGM (#1215) * Export cpg as json via Neo4j OGM * Update Readme for cpg-neo4j --------- Co-authored-by: Konrad Weiss --- cpg-neo4j/README.md | 96 +++++++++---- .../aisec/cpg_vis_neo4j/Application.kt | 132 +++++++++++++++++- .../aisec/cpg_vis_neo4j/ApplicationTest.kt | 57 +++++++- 3 files changed, 246 insertions(+), 39 deletions(-) diff --git a/cpg-neo4j/README.md b/cpg-neo4j/README.md index eb1870d3b7..b379d89493 100644 --- a/cpg-neo4j/README.md +++ b/cpg-neo4j/README.md @@ -4,7 +4,7 @@ A simple tool to export a *code property graph* to a neo4j database. ## Requirements -The application requires Java 11 or higher. +The application requires Java 17 or higher. ## Build @@ -14,57 +14,95 @@ Build (and install) a distribution using Gradle ../gradlew installDist ``` +Please remember to adjust the `gradle.properties` before building the project. + ## Usage ``` -./build/install/cpg-neo4j/bin/cpg-neo4j [--host=] [--port=] - [--user=] [--password=] - [--includes-file=] [--save-depth=] - [--softwareComponents =,...]* - ... - - ... The paths to analyze. If module support is +./build/install/cpg-neo4j/bin/cpg-neo4j [--infer-nodes] [--load-includes] [--no-default-passes] + [--no-neo4j] [--no-purge-db] [--print-benchmark] + [--use-unity-build] [--benchmark-json=] + [--custom-pass-list=] + [--export-json=] [--host=] + [--includes-file=] + [--password=] [--port=] + [--save-depth=] [--top-level=] + [--user=] ([...] | -S= + [-S=]... | + --json-compilation-database= | + --list-passes) + [...] The paths to analyze. If module support is enabled, the paths will be looked at if they contain modules - -S, --softwareComponents - Maps the names of software components to their - respective files. The files are separated by - commas (but no whitespace character is allowed). - Multiple such components can be specified by providing - more than one -S option. + --benchmark-json= + Save benchmark results to json file + --custom-pass-list= + Add custom list of passes (includes + --no-default-passes) which is passed as a + comma-separated list; give either pass name if + pass is in list, or its FQDN (e.g. + --custom-pass-list=DFGPass,CallResolver) + --export-json= + Export cpg as json --host= Set the host of the neo4j Database (default: localhost). - --load-includes Enable TranslationConfiguration option loadIncludes --includes-file= Load includes from file + --infer-nodes Create inferred nodes for missing declarations + --json-compilation-database= + The path to an optional a JSON compilation database + --list-passes Prints the list available passes + --load-includes Enable TranslationConfiguration option loadIncludes + --no-default-passes Do not register default passes [used for debugging] + --no-neo4j Do not push cpg into neo4j [used for debugging] + --no-purge-db Do no purge neo4j database before pushing the cpg --password= Neo4j password (default: password --port= Set the port of the neo4j Database (default: 7687). + --print-benchmark Print benchmark result as markdown table + -S, --softwareComponents= + Maps the names of software components to their + respective files. The files are separated by + commas (No whitespace!). + Example: -S App1=./file1.c,./file2.c -S App2=. + /Main.java,./Class.java --save-depth= Performance optimisation: Limit recursion depth form neo4j OGM when leaving the AST. -1 (default) means no limit is used. + --top-level= Set top level directory of project structure. + Default: Largest common path of all source files + --use-unity-build Enable unity build mode for C++ (requires + --load-includes) --user= Neo4j user name (default: neo4j) ``` You can provide a list of paths of arbitrary length that can contain both file paths and directory paths. +## Json export + +It is possible to export the cpg as json file with the `--export-json` option. +The graph is serialized as list of nodes and edges: +```json +{ + "nodes": [...], + "edges": [...] +} +``` +Documentation about the graph schema can be found at: +[https://fraunhofer-aisec.github.io/cpg/CPG/specs/graph](https://fraunhofer-aisec.github.io/cpg/CPG/specs/graph) + +Usage example: +``` +$ build/install/cpg-neo4j/bin/cpg-neo4j --export-json cpg-export.json --no-neo4j src/test/resources/client.cpp +``` + +To export the cpg from a neo4j database, you can use the neo4j `apoc` plugin. +There it's also possible to export only parts of the graph. + ## Known issues: - While importing sufficiently large projects with the parameter --save-depth=-1 a java.lang.StackOverflowError may occur. - This error could be solved by increasing the stack size with the JavaVM option: -Xss4m + - Otherwise the depth must be limited (e.g. 3 or 5) - While pushing a constant value larger than 2^63 - 1 a java.lang.IllegalArgumentException occurs. - -## Experimental Languages - -### Python - -Follow the instructions on the main README to install jep either globally or using a virtualenv. After building -the distribution (see above), use the following steps to analyse a python file. - -``` -cd ./build/install/cpg-neo4j -bin/cpg-neo4j --enable-experimental-python ../../../src/test/resources/test.py -``` - -It is important to change into the directory where the distribution is installed, otherwise the `cpg.py` will not be found. \ No newline at end of file diff --git a/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt b/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt index 022309fd2a..65e3dd417c 100644 --- a/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt +++ b/cpg-neo4j/src/main/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/Application.kt @@ -25,12 +25,12 @@ */ package de.fraunhofer.aisec.cpg_vis_neo4j +import com.fasterxml.jackson.databind.ObjectMapper import de.fraunhofer.aisec.cpg.* import de.fraunhofer.aisec.cpg.frontends.CompilationDatabase.Companion.fromFile import de.fraunhofer.aisec.cpg.helpers.Benchmark import de.fraunhofer.aisec.cpg.passes.* import java.io.File -import java.lang.Class import java.net.ConnectException import java.nio.file.Paths import java.util.concurrent.Callable @@ -38,7 +38,13 @@ import kotlin.reflect.KClass import kotlin.system.exitProcess import org.neo4j.driver.exceptions.AuthenticationException import org.neo4j.ogm.config.Configuration +import org.neo4j.ogm.context.EntityGraphMapper +import org.neo4j.ogm.context.MappingContext +import org.neo4j.ogm.cypher.compiler.MultiStatementCypherCompiler +import org.neo4j.ogm.cypher.compiler.builders.node.DefaultNodeBuilder +import org.neo4j.ogm.cypher.compiler.builders.node.DefaultRelationshipBuilder import org.neo4j.ogm.exception.ConnectionException +import org.neo4j.ogm.metadata.MetaData import org.neo4j.ogm.session.Session import org.neo4j.ogm.session.SessionFactory import org.slf4j.Logger @@ -62,6 +68,18 @@ private const val DEFAULT_USER_NAME = "neo4j" private const val DEFAULT_PASSWORD = "password" private const val DEFAULT_SAVE_DEPTH = -1 +data class JsonNode(val id: Long, val labels: Set, val properties: Map) + +data class JsonEdge( + val id: Long, + val type: String, + val startNode: Long, + val endNode: Long, + val properties: Map +) + +data class JsonGraph(val nodes: List, val edges: List) + /** * An application to export the cpg to a neo4j database. @@ -215,6 +233,9 @@ class Application : Callable { ) private var benchmarkJson: File? = null + @CommandLine.Option(names = ["--export-json"], description = ["Export cpg as json"]) + private var exportJsonFile: File? = null + private var passClassList = listOf( TypeHierarchyResolver::class, @@ -232,6 +253,107 @@ class Application : Callable { private val passList: List get() = passClassList.mapNotNull { it.simpleName } + private val packages: Array = + arrayOf("de.fraunhofer.aisec.cpg.graph", "de.fraunhofer.aisec.cpg.frontends") + + /** + * Create node and relationship builders to map the cpg via OGM. This method is not a public API + * of the OGM, thus we use reflection to access the related methods. + * + * @param translationResult, translationResult to map + */ + fun translateCPGToOGMBuilders( + translationResult: TranslationResult + ): Pair?, List?> { + val meta = MetaData(*packages) + val con = MappingContext(meta) + val entityGraphMapper = EntityGraphMapper(meta, con) + + translationResult.components.map { entityGraphMapper.map(it, depth) } + translationResult.additionalNodes.map { entityGraphMapper.map(it, depth) } + + val compiler = entityGraphMapper.compileContext().compiler + + // get private fields of `CypherCompiler` via reflection + val getNewNodeBuilders = + MultiStatementCypherCompiler::class.java.getDeclaredField("newNodeBuilders") + val getNewRelationshipBuilders = + MultiStatementCypherCompiler::class.java.getDeclaredField("newRelationshipBuilders") + getNewNodeBuilders.isAccessible = true + getNewRelationshipBuilders.isAccessible = true + + // We only need `newNodeBuilders` and `newRelationshipBuilders` as we are "importing" to an + // empty "db" and all nodes and relations will be new + val newNodeBuilders = + (getNewNodeBuilders[compiler] as? ArrayList<*>)?.filterIsInstance() + val newRelationshipBuilders = + (getNewRelationshipBuilders[compiler] as? ArrayList<*>)?.filterIsInstance< + DefaultRelationshipBuilder + >() + return newNodeBuilders to newRelationshipBuilders + } + + /** + * Use the provided node and relationship builders to create list of nodes and edges + * + * @param newNodeBuilders, input node builders + * @param newRelationshipBuilders, input relationship builders + */ + fun buildJsonGraph( + newNodeBuilders: List?, + newRelationshipBuilders: List? + ): JsonGraph { + // create simple json structure with flat list of nodes and edges + val nodes = + newNodeBuilders?.map { + val node = it.node() + JsonNode( + node.id, + node.labels.toSet(), + node.propertyList.associate { prop -> prop.key to prop.value } + ) + } + ?: emptyList() + val edges = + newRelationshipBuilders + // For some reason, there are edges without start or end node?? + ?.filter { it.edge().startNode != null } + ?.map { + val edge = it.edge() + JsonEdge( + edge.id, + edge.type, + edge.startNode, + edge.endNode, + edge.propertyList.associate { prop -> prop.key to prop.value } + ) + } + ?: emptyList() + + return JsonGraph(nodes, edges) + } + + /** + * Exports the TranslationResult to json. Serialization is done via the Neo4j OGM. + * + * @param translationResult, input translationResult, not null + * @param path, path to output json file + */ + fun exportToJson(translationResult: TranslationResult, path: File) { + val bench = Benchmark(this.javaClass, "Export cpg to json", false, translationResult) + log.info("Export graph to json using import depth: $depth") + + val (nodes, edges) = translateCPGToOGMBuilders(translationResult) + val graph = buildJsonGraph(nodes, edges) + val objectMapper = ObjectMapper() + objectMapper.writeValue(path, graph) + + log.info( + "Exported ${graph.nodes.size} Nodes and ${graph.edges.size} Edges to json file ${path.absoluteFile}" + ) + bench.addMeasurement() + } + /** * Pushes the whole translationResult to the neo4j db. * @@ -288,12 +410,7 @@ class Application : Callable { .credentials(neo4jUsername, neo4jPassword) .verifyConnection(VERIFY_CONNECTION) .build() - sessionFactory = - SessionFactory( - configuration, - "de.fraunhofer.aisec.cpg.graph", - "de.fraunhofer.aisec.cpg.frontends" - ) + sessionFactory = SessionFactory(configuration, *packages) session = sessionFactory.openSession() } catch (ex: ConnectionException) { @@ -447,6 +564,7 @@ class Application : Callable { "Benchmark: analyzing code in " + (analyzingTime - startTime) / S_TO_MS_FACTOR + " s." ) + exportJsonFile?.let { exportToJson(translationResult, it) } if (!noNeo4j) { pushToNeo4j(translationResult) } diff --git a/cpg-neo4j/src/test/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/ApplicationTest.kt b/cpg-neo4j/src/test/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/ApplicationTest.kt index b7e26cecee..9faabb3585 100644 --- a/cpg-neo4j/src/test/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/ApplicationTest.kt +++ b/cpg-neo4j/src/test/kotlin/de/fraunhofer/aisec/cpg_vis_neo4j/ApplicationTest.kt @@ -29,19 +29,21 @@ import de.fraunhofer.aisec.cpg.* import de.fraunhofer.aisec.cpg.graph.builder.* import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration import de.fraunhofer.aisec.cpg.graph.functions +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression import de.fraunhofer.aisec.cpg.graph.types.* import java.nio.file.Paths +import kotlin.io.path.createTempFile +import kotlin.reflect.jvm.javaField import kotlin.test.Test import kotlin.test.assertEquals import kotlin.test.assertNotNull import org.junit.jupiter.api.Tag +import org.neo4j.ogm.annotation.Relationship import picocli.CommandLine @Tag("integration") class ApplicationTest { - @Test - @Throws(InterruptedException::class) - fun testPush() { + private fun createTranslationResult(): Pair { val topLevel = Paths.get("src").resolve("test").resolve("resources").toAbsolutePath() val path = topLevel.resolve("client.cpp").toAbsolutePath() @@ -52,6 +54,13 @@ class ApplicationTest { val translationConfiguration = application.setupTranslationConfiguration() val translationResult = TranslationManager.builder().config(translationConfiguration).build().analyze().get() + return application to translationResult + } + + @Test + @Throws(InterruptedException::class) + fun testPush() { + val (application, translationResult) = createTranslationResult() assertEquals(31, translationResult.functions.size) @@ -72,4 +81,46 @@ class ApplicationTest { session.clear() sessionAndSessionFactoryPair.second.close() } + + @Test + fun testSerializeCpgViaOGM() { + val (application, translationResult) = createTranslationResult() + + assertEquals(31, translationResult.functions.size) + + val (nodes, edges) = application.translateCPGToOGMBuilders(translationResult) + val graph = application.buildJsonGraph(nodes, edges) + val connectToFuncDel = + graph.nodes.firstOrNull { + it.labels.contains(FunctionDeclaration::class.simpleName) && + it.properties["name"] == "connectTo" + } + assertNotNull(connectToFuncDel) + + val connectToCallExpr = + graph.nodes.firstOrNull { + it.labels.contains(CallExpression::class.simpleName) && + it.properties["name"] == "connectTo" + } + assertNotNull(connectToCallExpr) + + val invokesEdge = + graph.edges.firstOrNull { + it.type == + (CallExpression::invokeEdges.javaField?.getAnnotation(Relationship::class.java)) + ?.value && + it.startNode == connectToCallExpr.id && + it.endNode == connectToFuncDel.id + } + assertNotNull(invokesEdge) + } + + @Test + fun testExportToJson() { + val (application, translationResult) = createTranslationResult() + assertEquals(31, translationResult.functions.size) + val path = createTempFile().toFile() + application.exportToJson(translationResult, path) + assert(path.length() > 0) + } }