From 07ae30875c1963fc490be0b89ddccee31f763b51 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 11 Jun 2024 09:36:04 +0200 Subject: [PATCH 1/5] features: add aarch64 arch (#2144) * features: add aarch64 arch --- CHANGELOG.md | 1 + capa/features/common.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index da9482f0d..beaa48caa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - render maec/* fields #843 @s-ff - replace Halo spinner with Rich #2086 @s-ff - optimize rule matching #2080 @williballenthin +- add aarch64 as a valid architecture #2144 mehunhoff@google.com @williballenthin ### Breaking Changes diff --git a/capa/features/common.py b/capa/features/common.py index c4b7df8e6..cb938f299 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -409,9 +409,10 @@ def get_value_str(self): # other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types ARCH_I386 = "i386" ARCH_AMD64 = "amd64" +ARCH_AARCH64 = "aarch64" # dotnet ARCH_ANY = "any" -VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY) +VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_AARCH64, ARCH_ANY) class Arch(Feature): From 833ec471706d04a6adfdaee16eddb9b86b2a5340 Mon Sep 17 00:00:00 2001 From: Willi Ballenthin Date: Tue, 11 Jun 2024 14:29:34 +0200 Subject: [PATCH 2/5] relax pyproject dependency versions and introduce requirements.txt (#2132) * relax pyproject dependency versions and introduce requirements.txt closes #2053 closes #2079 * pyproject: document dev/build profile dependency policies * changelog * doc: installation: describe requirements.txt usage * pyproject: don't use dnfile 0.15 yet --------- Co-authored-by: Moritz --- .github/workflows/build.yml | 4 +- .github/workflows/publish.yml | 1 + .github/workflows/tests.yml | 20 +++++-- CHANGELOG.md | 1 + doc/installation.md | 6 +++ pyproject.toml | 99 ++++++++++++++++++++++++++++------- requirements.txt | 46 ++++++++++++++++ 7 files changed, 152 insertions(+), 25 deletions(-) create mode 100644 requirements.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4a9bf555f..85b898b4d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -51,7 +51,9 @@ jobs: - name: Upgrade pip, setuptools run: python -m pip install --upgrade pip setuptools - name: Install capa with build requirements - run: pip install -e .[build] + run: | + pip install -r requirements.txt + pip install -e .[build] - name: Build standalone executable run: pyinstaller --log-level DEBUG .github/pyinstaller/pyinstaller.spec - name: Does it run (PE)? diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index cb2a00f97..4a591d778 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -25,6 +25,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + pip install -r requirements.txt pip install -e .[build] - name: build package run: | diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 812528564..5553ceae5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,7 +35,9 @@ jobs: with: python-version: "3.11" - name: Install dependencies - run: pip install -e .[dev] + run: | + pip install -r requirements.txt + pip install -e .[dev] - name: Lint with ruff run: pre-commit run ruff - name: Lint with isort @@ -61,7 +63,9 @@ jobs: with: python-version: "3.11" - name: Install capa - run: pip install -e .[dev] + run: | + pip install -r requirements.txt + pip install -e .[dev] - name: Run rule linter run: python scripts/lint.py rules/ @@ -96,7 +100,9 @@ jobs: if: matrix.os == 'ubuntu-20.04' run: sudo apt-get install -y libyaml-dev - name: Install capa - run: pip install -e .[dev] + run: | + pip install -r requirements.txt + pip install -e .[dev] - name: Run tests (fast) # this set of tests runs about 80% of the cases in 20% of the time, # and should catch most errors quickly. @@ -131,7 +137,9 @@ jobs: run: sudo apt-get install -y libyaml-dev - name: Install capa if: ${{ env.BN_SERIAL != 0 }} - run: pip install -e .[dev] + run: | + pip install -r requirements.txt + pip install -e .[dev] - name: install Binary Ninja if: ${{ env.BN_SERIAL != 0 }} run: | @@ -188,7 +196,9 @@ jobs: - name: Install pyyaml run: sudo apt-get install -y libyaml-dev - name: Install capa - run: pip install -e .[dev] + run: | + pip install -r requirements.txt + pip install -e .[dev] - name: Run tests run: | mkdir ./.github/ghidra/project diff --git a/CHANGELOG.md b/CHANGELOG.md index beaa48caa..826fc78c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - replace Halo spinner with Rich #2086 @s-ff - optimize rule matching #2080 @williballenthin - add aarch64 as a valid architecture #2144 mehunhoff@google.com @williballenthin +- relax dependency version requirements for the capa library #2053 @williballenthin ### Breaking Changes diff --git a/doc/installation.md b/doc/installation.md index 57c939c2b..93df732c2 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -91,6 +91,12 @@ For more details about creating and using virtual environments, check out the [v ##### Install development dependencies +When developing capa, please use the pinned dependencies found in `requirements.txt`. +This ensures that everyone has the exact same, reproducible environment. +Please install these dependencies before install capa (from source or from PyPI): + +`$ pip install -r requirements.txt` + We use the following tools to ensure consistent code style and formatting: - [black](https://github.com/psf/black) code formatter - [isort](https://pypi.org/project/isort/) code formatter diff --git a/pyproject.toml b/pyproject.toml index 714a567b3..268950764 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,25 +32,78 @@ classifiers = [ "Topic :: Security", ] dependencies = [ - "tqdm==4.66.4", - "pyyaml==6.0.1", - "tabulate==0.9.0", - "colorama==0.4.6", - "termcolor==2.4.0", - "wcwidth==0.2.13", - "ida-settings==2.1.0", - "viv-utils[flirt]==0.7.9", - "networkx==3.1", - "ruamel.yaml==0.18.6", - "vivisect==1.1.1", - "pefile==2023.2.7", - "pyelftools==0.31", - "dnfile==0.14.1", - "dncil==1.0.2", - "pydantic==2.7.1", - "rich==13.7.1", - "humanize==4.9.0", - "protobuf==5.27.0", + # --------------------------------------- + # As a library, capa uses lower version bounds + # when specifying its dependencies. This lets + # other programs that use capa (and other libraries) + # to find a compatible set of dependency versions. + # + # We can optionally pin to specific versions or + # limit the upper bound when there's a good reason; + # but the default is to assume all greater versions + # probably work with capa until proven otherwise. + # + # The following link provides good background: + # https://iscinumpy.dev/post/bound-version-constraints/ + # + # When we develop capa, and when we distribute it as + # a standalone binary, we'll use specific versions + # that are pinned in requirements.txt. + # But the requirements for a library are specified here + # and are looser. + # + # Related discussions: + # + # - https://github.com/mandiant/capa/issues/2053 + # - https://github.com/mandiant/capa/pull/2059 + # - https://github.com/mandiant/capa/pull/2079 + # + # --------------------------------------- + # The following dependency versions were imported + # during June 2024 by truncating specific versions to + # their major-most version (major version when possible, + # or minor otherwise). + # As specific constraints are identified, please provide + # comments and context. + "tqdm>=4", + "pyyaml>=6", + "tabulate>=0.9", + "colorama>=0.4", + "termcolor>=2", + "wcwidth>=0.2", + "ida-settings>=2", + "ruamel.yaml>=0.18", + "pefile>=2023.2.7", + "pyelftools>=0.31", + "pydantic>=2", + "rich>=13", + "humanize>=4", + "protobuf>=5", + + # --------------------------------------- + # Dependencies that we develop + # + # These dependencies are often actively influenced by capa, + # so we provide a minimum patch version that includes the + # latest bug fixes we need here. + "viv-utils[flirt]>=0.7.9", + "vivisect>=1.1.1", + "dncil>=1.0.2", + + # --------------------------------------- + # Dependencies with version caps + # + # These dependencies must not exceed the version cap, + # typically due to dropping support for python releases + # we still support. + + # TODO(williballenthin): networkx 3.2 doesn't support python 3.8 while capa does. + # https://github.com/mandiant/capa/issues/1966 + "networkx>=3,<3.2", + + # TODO(williballenthin): dnfile 0.15 changes UserString API and we havent updated yet. + # https://github.com/mandiant/capa/pull/2037 + "dnfile>=0.14.1,<0.15", ] dynamic = ["version"] @@ -63,6 +116,10 @@ namespaces = false [project.optional-dependencies] dev = [ + # Dev and build dependencies are not relaxed because + # we want all developer environments to be consistent. + # These dependencies are not used in production environments + # and should not conflict with other libraries/tooling. "pre-commit==3.5.0", "pytest==8.0.0", "pytest-sugar==1.0.0", @@ -99,6 +156,10 @@ dev = [ "deptry==0.16.1" ] build = [ + # Dev and build dependencies are not relaxed because + # we want all developer environments to be consistent. + # These dependencies are not used in production environments + # and should not conflict with other libraries/tooling. "pyinstaller==6.7.0", "setuptools==69.5.1", "build==1.2.1" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..004f98725 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,46 @@ +# Dependencies with specific version constraints +# used during development and building the standalone executables. +# For these environments, use `pip install -r requirements.txt` +# before installing capa from source/pypi. This will ensure +# the following specific versions are used. +# +# Initially generated via: pip freeze | grep -v -- "-e" +# Kept up to date by dependabot. +annotated-types==0.7.0 +colorama==0.4.6 +cxxfilt==0.2.2 +dncil==1.0.2 +dnfile==0.15.0 +funcy==2.0 +humanize==4.9.0 +ida-netnode==3.0 +ida-settings==2.1.0 +intervaltree==3.1.0 +markdown-it-py==3.0.0 +mdurl==0.1.2 +msgpack==1.0.8 +networkx==3.1 +pefile==2023.2.7 +pip==24.0 +protobuf==5.27.1 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.22 +pydantic==2.7.3 +pydantic-core==2.18.4 +pyelftools==0.31 +pygments==2.18.0 +python-flirt==0.8.6 +pyyaml==6.0.1 +rich==13.7.1 +ruamel-yaml==0.18.6 +ruamel-yaml-clib==0.2.8 +setuptools==65.5.0 +six==1.16.0 +sortedcontainers==2.4.0 +tabulate==0.9.0 +termcolor==2.4.0 +tqdm==4.66.4 +viv-utils==0.7.9 +vivisect==1.1.1 +wcwidth==0.2.13 From c97d2d7244b55666f57945b2ccb8fe4b351e1aab Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 11 Jun 2024 14:36:58 +0200 Subject: [PATCH 3/5] build(deps): bump pyinstaller from 6.7.0 to 6.8.0 (#2138) Bumps [pyinstaller](https://github.com/pyinstaller/pyinstaller) from 6.7.0 to 6.8.0. - [Release notes](https://github.com/pyinstaller/pyinstaller/releases) - [Changelog](https://github.com/pyinstaller/pyinstaller/blob/develop/doc/CHANGES.rst) - [Commits](https://github.com/pyinstaller/pyinstaller/compare/v6.7.0...v6.8.0) --- updated-dependencies: - dependency-name: pyinstaller dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 268950764..b9c361ff1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -160,7 +160,7 @@ build = [ # we want all developer environments to be consistent. # These dependencies are not used in production environments # and should not conflict with other libraries/tooling. - "pyinstaller==6.7.0", + "pyinstaller==6.8.0", "setuptools==69.5.1", "build==1.2.1" ] From 52e24e560bd5e7a7175768cd18744ec03c6002b7 Mon Sep 17 00:00:00 2001 From: ReWithMe Date: Tue, 11 Jun 2024 07:01:26 -0600 Subject: [PATCH 4/5] FEAT(capa2sarif) Add SARIF conversion script from json output (#2093) * feat(capa2sarif): add new sarif conversion script converting json output to sarif schema, update dependencies, and update changelog * fix(capa2sarif): removing copy and paste transcription errors * fix(capa2sarif): remove dependencies from pyproject toml to guarded import statements * chore(capa2sarif): adding node in readme specifying dependency and applied auto formatter for styling * style(capa2sarif): applied import sorting and fixed typo in invocations function * test(capa2sarif): adding simple test for capa to sarif conversion script using existing result document * style(capa2sarif): fixing typo in version string in usage * style(capa2sarif): isort failing due to reordering of typehint imports * style(capa2sarif): fixing import order as isort on local machine was not updating code --------- Co-authored-by: ReversingWithMe Co-authored-by: Willi Ballenthin --- CHANGELOG.md | 1 + scripts/capa2sarif.py | 375 ++++++++++++++++++++++++++++++++++++++++++ tests/test_scripts.py | 1 + 3 files changed, 377 insertions(+) create mode 100644 scripts/capa2sarif.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 826fc78c8..00519da80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - add function in capa/helpers to load plain and compressed JSON reports #1883 @Rohit1123 - document Antivirus warnings and VirusTotal false positive detections #2028 @RionEV @mr-tz +- Add json to sarif conversion script @reversingwithme - render maec/* fields #843 @s-ff - replace Halo spinner with Rich #2086 @s-ff - optimize rule matching #2080 @williballenthin diff --git a/scripts/capa2sarif.py b/scripts/capa2sarif.py new file mode 100644 index 000000000..62f8e47ae --- /dev/null +++ b/scripts/capa2sarif.py @@ -0,0 +1,375 @@ +# Copyright (C) 2021 Mandiant, Inc. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: [package root]/LICENSE.txt +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and limitations under the License. +""" +Convert capa json output to sarif schema + usage: capa2sarif.py [-h] [-g] [-r] [-t TAG] [--version] capa_output + +Capa to SARIF analysis file +positional arguments: + capa_output Path to capa JSON output file +optional arguments: + -h, --help show this help message and exit + --version show program's version number and exit + -t TAG, --tag TAG filter on rule meta field values (ruleid) + +Requires: + - sarif_om 1.0.4 + - jschema_to_python 1.2.3 +""" +import sys +import json +import logging +import argparse +from typing import List, Optional +from pathlib import Path + +from capa.version import __version__ + +logger = logging.getLogger("capa2sarif") + +# Dependencies +try: + from sarif_om import Run, Tool, SarifLog, ToolComponent +except ImportError as e: + logger.error( + "Required import `sarif_om` is not installed. This is solved by installing `python3 -m pip install sarif_om>=1.0.4`. %s", + e, + ) + exit(-4) + +try: + from jschema_to_python.to_json import to_json +except ImportError as e: + logger.error( + "Required import `jschema_to_python` is not installed. This is solved by installing `python3 -m pip install jschema_to_python>=1.2.3`, %s", + e, + ) + exit(-4) + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Capa to SARIF analysis file") + + # Positional argument + parser.add_argument("capa_output", help="Path to capa JSON output file") + + # Optional arguments + parser.add_argument( + "-g", + "--ghidra-compat", + action="store_true", + help="Compatibility for Ghidra 11.0.X", + ) + parser.add_argument( + "-r", + "--radare-compat", + action="store_true", + help="Compatibility for Radare r2sarif plugin v2.0", + ) + parser.add_argument("-t", "--tag", help="Filter on rule meta field values (ruleid)") + parser.add_argument( + "--version", action="version", version=f"%(prog)s {__version__}" + ) + + return parser.parse_args() + + +def main() -> int: + logging.basicConfig(level=logging.INFO) + logging.getLogger().setLevel(logging.INFO) + + args = _parse_args() + + try: + with Path(args.capa_output).open() as capa_output: + json_data = json.load(capa_output) + except ValueError: + logger.error( + "Input data was not valid JSON, input should be a capa json output file." + ) + return -1 + except json.JSONDecodeError: + # An exception has occured + logger.error( + "Input data was not valid JSON, input should be a capa json output file." + ) + return -2 + + # Marshall json into Sarif + # Create baseline sarif structure to be populated from json data + sarif_structure: Optional[dict] = _sarif_boilerplate( + json_data["meta"], json_data["rules"] + ) + if sarif_structure is None: + logger.errort("An Error has occured creating default sarif structure.") + return -3 + + _populate_artifact(sarif_structure, json_data["meta"]) + _populate_invocations(sarif_structure, json_data["meta"]) + _populate_results(sarif_structure, json_data["rules"], args.ghidra_compat) + + if args.ghidra_compat: + # Ghidra can't handle this structure as of 11.0.x + if "invocations" in sarif_structure["runs"][0]: + del sarif_structure["runs"][0]["invocations"] + + # artifacts must include a description as well with a text field. + if "artifacts" in sarif_structure["runs"][0]: + sarif_structure["runs"][0]["artifacts"][0]["description"] = { + "text": "placeholder" + } + + # For better compliance with Ghidra table. Iteraction through properties['additionalProperties'] + """ + "additionalProperties": { + "to": "", + "offset": 0, + "primary": true, + "index": <>"", + "kind": "", + "opIndex": 0, + "sourceType": "" + } + """ + + if args.radare_compat: + # Add just enough for passing tests + _add_filler_optional(json_data, sarif_structure) + + print(json.dumps(sarif_structure, indent=4)) # noqa: T201 + return 0 + + +def _sarif_boilerplate(data_meta: dict, data_rules: dict) -> Optional[dict]: + # Only track rules that appear in this log, not full 1k + rules = [] + # Parse rules from parsed sarif structure + for key in data_rules: + # Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID + # this is not good practice to use long name for ruleID + attack_length = len(data_rules[key]["meta"]["attack"]) + mbc_length = len(data_rules[key]["meta"]["mbc"]) + if attack_length or mbc_length: + id = ( + data_rules[key]["meta"]["attack"][0]["id"] + if attack_length > 0 + else data_rules[key]["meta"]["mbc"][0]["id"] + ) + else: + id = data_rules[key]["meta"]["name"] + + # Append current rule + rules.append( + { + # Default to attack identifier, fall back to MBC, mainly relevant if both are present + "id": id, + "name": data_rules[key]["meta"]["name"], + "shortDescription": {"text": data_rules[key]["meta"]["name"]}, + "messageStrings": { + "default": {"text": data_rules[key]["meta"]["name"]} + }, + "properties": { + "namespace": data_rules[key]["meta"]["namespace"] + if "namespace" in data_rules[key]["meta"] + else [], + "scopes": data_rules[key]["meta"]["scopes"], + "references": data_rules[key]["meta"]["references"], + "lib": data_rules[key]["meta"]["lib"], + }, + } + ) + + tool = Tool( + driver=ToolComponent( + name="Capa", + version=__version__, + information_uri="https://github.com/mandiant/capa", + rules=rules, + ) + ) + + # Create a SARIF Log object, populate with a single run + sarif_log = SarifLog( + version="2.1.0", + schema_uri="https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/sarif-schema-2.1.0.json", + runs=[Run(tool=tool, results=[], artifacts=[], invocations=[])], + ) + + # Convert the SARIF log to a dictionary and then to a JSON string + try: + sarif_outline = json.loads(to_json(sarif_log)) + except json.JSONDecodeError: + # An exception has occured + return None + + return sarif_outline + + +def _populate_artifact(sarif_log: dict, meta_data: dict) -> None: + """ + @param sarif_log: dict - sarif data structure including runs + @param meta_data: dict - Capa meta output + @returns None, updates sarif_log via side-effects + """ + sample = meta_data["sample"] + artifact = { + "location": {"uri": sample["path"]}, + "roles": ["analysisTarget"], + "hashes": { + "md5": sample["md5"], + "sha-1": sample["sha1"], + "sha-256": sample["sha256"], + }, + } + sarif_log["runs"][0]["artifacts"].append(artifact) + + +def _populate_invocations(sarif_log: dict, meta_data: dict) -> None: + """ + @param sarif_log: dict - sarif data structure including runs + @param meta_data: dict - Capa meta output + @returns None, updates sarif_log via side-effects + """ + analysis_time = meta_data["timestamp"] + argv = meta_data["argv"] + analysis = meta_data["analysis"] + invoke = { + "commandLine": "capa " + " ".join(argv), + "arguments": argv if len(argv) > 0 else [], + # Format in Zulu time, this may require a conversion from local timezone + "endTimeUtc": f"{analysis_time}Z", + "executionSuccessful": True, + "properties": { + "format": analysis["format"], + "arch": analysis["arch"], + "os": analysis["os"], + "extractor": analysis["extractor"], + "rule_location": analysis["rules"], + "base_address": analysis["base_address"], + }, + } + sarif_log["runs"][0]["invocations"].append(invoke) + + +def _enumerate_evidence(node: dict, related_count: int) -> List[dict]: + related_locations = [] + if node.get("success") and node.get("node").get("type") != "statement": + label = "" + if node.get("node").get("type") == "feature": + if node.get("node").get("feature").get("type") == "api": + label = "api: " + node.get("node").get("feature").get("api") + elif node.get("node").get("feature").get("type") == "match": + label = "match: " + node.get("node").get("feature").get("match") + elif node.get("node").get("feature").get("type") == "number": + label = f"number: {node.get('node').get('feature').get('description')} ({node.get('node').get('feature').get('number')})" + elif node.get("node").get("feature").get("type") == "offset": + label = f"offset: {node.get('node').get('feature').get('description')} ({node.get('node').get('feature').get('offset')})" + elif node.get("node").get("feature").get("type") == "mnemonic": + label = f"mnemonic: {node.get('node').get('feature').get('mnemonic')}" + elif node.get("node").get("feature").get("type") == "characteristic": + label = f"characteristic: {node.get('node').get('feature').get('characteristic')}" + elif node.get("node").get("feature").get("type") == "os": + label = f"os: {node.get('node').get('feature').get('os')}" + elif node.get("node").get("feature").get("type") == "operand number": + label = f"operand: ({node.get('node').get('feature').get('index')} ) {node.get('node').get('feature').get('description')} ({node.get('node').get('feature').get('operand_number')})" + else: + logger.error( + "Not implemented %s", + node.get("node").get("feature").get("type"), + file=sys.stderr, + ) + return [] + else: + logger.error( + "Not implemented %s", node.get("node").get("type"), file=sys.stderr + ) + return [] + + for loc in node.get("locations"): + if loc["type"] != "absolute": + continue + + related_locations.append( + { + "id": related_count, + "message": {"text": label}, + "physicalLocation": {"address": {"absoluteAddress": loc["value"]}}, + } + ) + related_count += 1 + + if node.get("success") and node.get("node").get("type") == "statement": + for child in node.get("children"): + related_locations += _enumerate_evidence(child, related_count) + + return related_locations + + +def _populate_results(sarif_log: dict, data_rules: dict, ghidra_compat: bool) -> None: + """ + @param sarif_log: dict - sarif data structure including runs + @param meta_data: dict - Capa meta output + @returns None, updates sarif_log via side-effects + """ + results = sarif_log["runs"][0]["results"] + + # Parse rules from parsed sarif structure + for key in data_rules: + # Use attack as default, if both exist then only use attack, if neither exist use the name of rule for ruleID + # this is not good practice to use long name for ruleID. + attack_length = len(data_rules[key]["meta"]["attack"]) + mbc_length = len(data_rules[key]["meta"]["mbc"]) + if attack_length or mbc_length: + id = ( + data_rules[key]["meta"]["attack"][0]["id"] + if attack_length > 0 + else data_rules[key]["meta"]["mbc"][0]["id"] + ) + else: + id = data_rules[key]["meta"]["name"] + + for address, details in data_rules[key]["matches"]: + related_cnt = 0 + related_locations = _enumerate_evidence(details, related_cnt) + + res = { + "ruleId": id, + "level": "none" if not ghidra_compat else "NONE", + "message": {"text": data_rules[key]["meta"]["name"]}, + "kind": "informational" if not ghidra_compat else "INFORMATIONAL", + "locations": [ + { + "physicalLocation": { + "address": { + "absoluteAddress": address["value"], + } + }, + } + ], + } + if not ghidra_compat: + res["relatedLocations"] = related_locations + + results.append(res) + + +def _add_filler_optional(capa_result: dict, sarif_log: dict) -> None: + """Update sarif file with just enough fields to pass radare tests""" + base_address = capa_result["meta"]["analysis"]["base_address"]["value"] + # Assume there is only one run, and one binary artifact + artifact = sarif_log["runs"][0]["artifacts"][0] + if "properties" not in artifact: + artifact["properties"] = {} + if "additionalProperties" not in artifact["properties"]: + artifact["properties"]["additionalProperties"] = {} + if "imageBase" not in artifact["properties"]["additionalProperties"]: + artifact["properties"]["additionalProperties"]["imageBase"] = base_address + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 052b1c89b..f6f12fd68 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -40,6 +40,7 @@ def get_rule_path(): [ pytest.param("capa2yara.py", [get_rules_path()]), pytest.param("capafmt.py", [get_rule_path()]), + pytest.param("capa2sarif.py", [Path(__file__).resolve().parent / "data" / "rd" / "Practical Malware Analysis Lab 01-01.dll_.json"]), # testing some variations of linter script pytest.param("lint.py", ["-t", "create directory", get_rules_path()]), # `create directory` rule has native and .NET example PEs From 1888d0e7e3e54a0315df29c1ebe2a22d21cfefee Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:03:56 +0200 Subject: [PATCH 5/5] build(deps): bump setuptools from 69.5.1 to 70.0.0 (#2135) Bumps [setuptools](https://github.com/pypa/setuptools) from 69.5.1 to 70.0.0. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v69.5.1...v70.0.0) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b9c361ff1..6aefdb0a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -161,7 +161,7 @@ build = [ # These dependencies are not used in production environments # and should not conflict with other libraries/tooling. "pyinstaller==6.8.0", - "setuptools==69.5.1", + "setuptools==70.0.0", "build==1.2.1" ] diff --git a/requirements.txt b/requirements.txt index 004f98725..42f40b8fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,7 +35,7 @@ pyyaml==6.0.1 rich==13.7.1 ruamel-yaml==0.18.6 ruamel-yaml-clib==0.2.8 -setuptools==65.5.0 +setuptools==70.0.0 six==1.16.0 sortedcontainers==2.4.0 tabulate==0.9.0