diff --git a/CHANGELOG.md b/CHANGELOG.md index 9343c28e4..e3f389af0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,16 +5,24 @@ ### New Features - ghidra: add Ghidra feature extractor and supporting code #1770 @colton-gabertan - ghidra: add entry script helping users run capa against a loaded Ghidra database #1767 @mike-hunhoff +- binja: add support for forwarded exports #1646 @xusheng6 +- binja: add support for symtab names #1504 @xusheng6 ### Breaking Changes -### New Rules (1) +### New Rules (4) - nursery/get-ntoskrnl-base-address @mr-tz +- host-interaction/network/connectivity/set-tcp-connection-state @johnk3r +- nursery/capture-process-snapshot-data @mr-tz +- collection/network/capture-packets-using-sharppcap jakub.jozwiak@mandiant.com - ### Bug Fixes - ghidra: fix ints_to_bytes performance #1761 @mike-hunhoff +- binja: improve function call site detection @xusheng6 +- binja: use binaryninja.load to open files @xusheng6 +- binja: bump binja version to 3.5 #1789 @xusheng6 ### capa explorer IDA Pro plugin diff --git a/README.md b/README.md index b60915131..a17b8d362 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa) [![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases) -[![Number of rules](https://img.shields.io/badge/rules-832-blue.svg)](https://github.com/mandiant/capa-rules) +[![Number of rules](https://img.shields.io/badge/rules-835-blue.svg)](https://github.com/mandiant/capa-rules) [![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster) [![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases) [![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt) diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index d46451e77..84b25348b 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -17,7 +17,7 @@ from capa.features.file import Export, Import, Section, FunctionName from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress -from capa.features.extractors.binja.helpers import unmangle_c_name +from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[int, int]]: @@ -82,6 +82,24 @@ def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address if name != unmangled_name: yield Export(unmangled_name), AbsoluteVirtualAddress(sym.address) + for sym in bv.get_symbols_of_type(SymbolType.DataSymbol): + if sym.binding not in [SymbolBinding.GlobalBinding]: + continue + + name = sym.short_name + if not name.startswith("__forwarder_name"): + continue + + # Due to https://github.com/Vector35/binaryninja-api/issues/4641, in binja version 3.5, the symbol's name + # does not contain the DLL name. As a workaround, we read the C string at the symbol's address, which contains + # both the DLL name and the function name. + # Once the above issue is closed in the next binjs stable release, we can update the code here to use the + # symbol name directly. + name = read_c_string(bv, sym.address, 1024) + forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(name) + yield Export(forwarded_name), AbsoluteVirtualAddress(sym.address) + yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address) + def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: """extract function imports @@ -125,15 +143,17 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre """ for sym_name in bv.symbols: for sym in bv.symbols[sym_name]: - if sym.type == SymbolType.LibraryFunctionSymbol: - name = sym.short_name - yield FunctionName(name), sym.address - if name.startswith("_"): - # some linkers may prefix linked routines with a `_` to avoid name collisions. - # extract features for both the mangled and un-mangled representations. - # e.g. `_fwrite` -> `fwrite` - # see: https://stackoverflow.com/a/2628384/87207 - yield FunctionName(name[1:]), sym.address + if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]: + continue + + name = sym.short_name + yield FunctionName(name), sym.address + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield FunctionName(name[1:]), sym.address def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: diff --git a/capa/features/extractors/binja/function.py b/capa/features/extractors/binja/function.py index a502a5f44..520de0b3f 100644 --- a/capa/features/extractors/binja/function.py +++ b/capa/features/extractors/binja/function.py @@ -7,8 +7,9 @@ # See the License for the specific language governing permissions and limitations under the License. from typing import Tuple, Iterator -from binaryninja import Function, BinaryView, LowLevelILOperation +from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation +from capa.features.file import FunctionName from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress from capa.features.extractors import loops @@ -23,13 +24,27 @@ def extract_function_calls_to(fh: FunctionHandle): # Everything that is a code reference to the current function is considered a caller, which actually includes # many other references that are NOT a caller. For example, an instruction `push function_start` will also be # considered a caller to the function - if caller.llil is not None and caller.llil.operation in [ + llil = caller.llil + if (llil is None) or llil.operation not in [ LowLevelILOperation.LLIL_CALL, LowLevelILOperation.LLIL_CALL_STACK_ADJUST, LowLevelILOperation.LLIL_JUMP, LowLevelILOperation.LLIL_TAILCALL, ]: - yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address) + continue + + if llil.dest.value.type not in [ + RegisterValueType.ImportedAddressValue, + RegisterValueType.ConstantValue, + RegisterValueType.ConstantPointerValue, + ]: + continue + + address = llil.dest.value.value + if address != func.start: + continue + + yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address) def extract_function_loop(fh: FunctionHandle): @@ -59,10 +74,31 @@ def extract_recursive_call(fh: FunctionHandle): yield Characteristic("recursive call"), fh.address +def extract_function_name(fh: FunctionHandle): + """extract function names (e.g., symtab names)""" + func: Function = fh.inner + bv: BinaryView = func.view + if bv is None: + return + + for sym in bv.get_symbols(func.start): + if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]: + continue + + name = sym.short_name + yield FunctionName(name), sym.address + if name.startswith("_"): + # some linkers may prefix linked routines with a `_` to avoid name collisions. + # extract features for both the mangled and un-mangled representations. + # e.g. `_fwrite` -> `fwrite` + # see: https://stackoverflow.com/a/2628384/87207 + yield FunctionName(name[1:]), sym.address + + def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr -FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call) +FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name) diff --git a/capa/features/extractors/binja/helpers.py b/capa/features/extractors/binja/helpers.py index a96f64dab..0ce0f073b 100644 --- a/capa/features/extractors/binja/helpers.py +++ b/capa/features/extractors/binja/helpers.py @@ -9,7 +9,7 @@ from typing import List, Callable from dataclasses import dataclass -from binaryninja import LowLevelILInstruction +from binaryninja import BinaryView, LowLevelILInstruction from binaryninja.architecture import InstructionTextToken @@ -51,3 +51,19 @@ def unmangle_c_name(name: str) -> str: return match.group(1) return name + + +def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str: + s: List[str] = [] + while len(s) < max_len: + try: + c = bv.read(offset + len(s), 1)[0] + except Exception: + break + + if c == 0: + break + + s.append(chr(c)) + + return "".join(s) diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index 3144fd15a..f2b8fefc2 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -94,28 +94,32 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) candidate_addrs.append(stub_addr) for address in candidate_addrs: - sym = func.view.get_symbol_at(address) - if sym is None or sym.type not in [SymbolType.ImportAddressSymbol, SymbolType.ImportedFunctionSymbol]: - continue - - sym_name = sym.short_name - - lib_name = "" - import_lib = bv.lookup_imported_object_library(sym.address) - if import_lib is not None: - lib_name = import_lib[0].name - if lib_name.endswith(".dll"): - lib_name = lib_name[:-4] - elif lib_name.endswith(".so"): - lib_name = lib_name[:-3] - - for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name): - yield API(name), ih.address - - if sym_name.startswith("_"): - for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]): + for sym in func.view.get_symbols(address): + if sym is None or sym.type not in [ + SymbolType.ImportAddressSymbol, + SymbolType.ImportedFunctionSymbol, + SymbolType.FunctionSymbol, + ]: + continue + + sym_name = sym.short_name + + lib_name = "" + import_lib = bv.lookup_imported_object_library(sym.address) + if import_lib is not None: + lib_name = import_lib[0].name + if lib_name.endswith(".dll"): + lib_name = lib_name[:-4] + elif lib_name.endswith(".so"): + lib_name = lib_name[:-3] + + for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name): yield API(name), ih.address + if sym_name.startswith("_"): + for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]): + yield API(name), ih.address + def extract_insn_number_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle diff --git a/capa/main.py b/capa/main.py index b91ca1e26..ae8421560 100644 --- a/capa/main.py +++ b/capa/main.py @@ -558,7 +558,8 @@ def get_extractor( sys.path.append(str(bn_api)) try: - from binaryninja import BinaryView, BinaryViewType + import binaryninja + from binaryninja import BinaryView except ImportError: raise RuntimeError( "Cannot import binaryninja module. Please install the Binary Ninja Python API first: " @@ -568,7 +569,7 @@ def get_extractor( import capa.features.extractors.binja.extractor with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress): - bv: BinaryView = BinaryViewType.get_view_of_file(str(path)) + bv: BinaryView = binaryninja.load(str(path)) if bv is None: raise RuntimeError(f"Binary Ninja cannot open file {path}") diff --git a/doc/capa_quickstart.pdf b/doc/capa_quickstart.pdf new file mode 100644 index 000000000..dc9a5c6ca Binary files /dev/null and b/doc/capa_quickstart.pdf differ diff --git a/pyproject.toml b/pyproject.toml index 41d5cc623..4f798eeb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ dependencies = [ "ruamel.yaml==0.17.32", "vivisect==1.1.1", "pefile==2023.2.7", - "pyelftools==0.29", + "pyelftools==0.30", "dnfile==0.13.0", "dncil==1.0.2", "pydantic==2.1.1", @@ -62,7 +62,7 @@ packages = ["capa"] [project.optional-dependencies] dev = [ "pre-commit==3.4.0", - "pytest==7.4.0", + "pytest==7.4.2", "pytest-sugar==0.9.7", "pytest-instafail==0.5.0", "pytest-cov==4.1.0", @@ -77,7 +77,7 @@ dev = [ "flake8-simplify==0.20.0", "flake8-use-pathlib==0.3.0", "flake8-copyright==0.2.4", - "ruff==0.0.286", + "ruff==0.0.291", "black==23.7.0", "isort==5.11.4", "mypy==1.5.1", @@ -98,7 +98,7 @@ dev = [ build = [ "pyinstaller==5.10.1", "setuptools==68.0.0", - "build==0.10.0" + "build==1.0.3" ] [project.urls] diff --git a/rules b/rules index eba332e70..a1e83cf14 160000 --- a/rules +++ b/rules @@ -1 +1 @@ -Subproject commit eba332e702d88927b5816770a9853dd0b3fbc47a +Subproject commit a1e83cf1476199653650e1ca38f14bcce5aeb2c6 diff --git a/tests/data b/tests/data index faf741a53..87bd888e1 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit faf741a538224f52d4412468f910d52a70911662 +Subproject commit 87bd888e1984a1e9f9ab8e63b8707794392f3156 diff --git a/tests/fixtures.py b/tests/fixtures.py index cabbccf9f..05b60c877 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -159,7 +159,8 @@ def get_dnfile_extractor(path: Path): @lru_cache(maxsize=1) def get_binja_extractor(path: Path): - from binaryninja import Settings, BinaryViewType + import binaryninja + from binaryninja import Settings import capa.features.extractors.binja.extractor @@ -168,7 +169,7 @@ def get_binja_extractor(path: Path): if path.name.endswith("kernel32-64.dll_"): old_pdb = settings.get_bool("pdb.loadGlobalSymbols") settings.set_bool("pdb.loadGlobalSymbols", False) - bv = BinaryViewType.get_view_of_file(str(path)) + bv = binaryninja.load(str(path)) if path.name.endswith("kernel32-64.dll_"): settings.set_bool("pdb.loadGlobalSymbols", old_pdb) diff --git a/tests/test_binja_features.py b/tests/test_binja_features.py index a2f0cd78f..78addff7c 100644 --- a/tests/test_binja_features.py +++ b/tests/test_binja_features.py @@ -36,16 +36,10 @@ @pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") @fixtures.parametrize( "sample,scope,feature,expected", - fixtures.FEATURE_PRESENCE_TESTS, + fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS, indirect=["sample", "scope"], ) def test_binja_features(sample, scope, feature, expected): - if isinstance(feature, capa.features.file.Export) and "." in str(feature.value): - pytest.xfail("skip Binja unsupported forwarded export feature, see #1646") - - if feature == capa.features.common.Characteristic("forwarded export"): - pytest.xfail("skip Binja unsupported forwarded export feature, see #1646") - fixtures.do_test_feature_presence(fixtures.get_binja_extractor, sample, scope, feature, expected) @@ -69,4 +63,4 @@ def test_standalone_binja_backend(): @pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed") def test_binja_version(): version = binaryninja.core_version_info() - assert version.major == 3 and version.minor == 4 + assert version.major == 3 and version.minor == 5