Skip to content

Commit

Permalink
Merge branch 'master' into Aayush-Goel-04/Issue#322
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-tz authored Oct 9, 2023
2 parents 8331ed6 + b315aac commit a9daa92
Show file tree
Hide file tree
Showing 13 changed files with 135 additions and 55 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,24 @@
### New Features
- ghidra: add Ghidra feature extractor and supporting code #1770 @colton-gabertan
- ghidra: add entry script helping users run capa against a loaded Ghidra database #1767 @mike-hunhoff
- binja: add support for forwarded exports #1646 @xusheng6
- binja: add support for symtab names #1504 @xusheng6

### Breaking Changes

### New Rules (1)
### New Rules (4)

- nursery/get-ntoskrnl-base-address @mr-tz
- host-interaction/network/connectivity/set-tcp-connection-state @johnk3r
- nursery/capture-process-snapshot-data @mr-tz
- collection/network/capture-packets-using-sharppcap [email protected]
-

### Bug Fixes
- ghidra: fix ints_to_bytes performance #1761 @mike-hunhoff
- binja: improve function call site detection @xusheng6
- binja: use binaryninja.load to open files @xusheng6
- binja: bump binja version to 3.5 #1789 @xusheng6

### capa explorer IDA Pro plugin

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/flare-capa)](https://pypi.org/project/flare-capa)
[![Last release](https://img.shields.io/github/v/release/mandiant/capa)](https://github.com/mandiant/capa/releases)
[![Number of rules](https://img.shields.io/badge/rules-832-blue.svg)](https://github.com/mandiant/capa-rules)
[![Number of rules](https://img.shields.io/badge/rules-835-blue.svg)](https://github.com/mandiant/capa-rules)
[![CI status](https://github.com/mandiant/capa/workflows/CI/badge.svg)](https://github.com/mandiant/capa/actions?query=workflow%3ACI+event%3Apush+branch%3Amaster)
[![Downloads](https://img.shields.io/github/downloads/mandiant/capa/total)](https://github.com/mandiant/capa/releases)
[![License](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE.txt)
Expand Down
40 changes: 30 additions & 10 deletions capa/features/extractors/binja/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from capa.features.file import Export, Import, Section, FunctionName
from capa.features.common import FORMAT_PE, FORMAT_ELF, Format, String, Feature, Characteristic
from capa.features.address import NO_ADDRESS, Address, FileOffsetAddress, AbsoluteVirtualAddress
from capa.features.extractors.binja.helpers import unmangle_c_name
from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name


def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[int, int]]:
Expand Down Expand Up @@ -82,6 +82,24 @@ def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address
if name != unmangled_name:
yield Export(unmangled_name), AbsoluteVirtualAddress(sym.address)

for sym in bv.get_symbols_of_type(SymbolType.DataSymbol):
if sym.binding not in [SymbolBinding.GlobalBinding]:
continue

name = sym.short_name
if not name.startswith("__forwarder_name"):
continue

# Due to https://github.com/Vector35/binaryninja-api/issues/4641, in binja version 3.5, the symbol's name
# does not contain the DLL name. As a workaround, we read the C string at the symbol's address, which contains
# both the DLL name and the function name.
# Once the above issue is closed in the next binjs stable release, we can update the code here to use the
# symbol name directly.
name = read_c_string(bv, sym.address, 1024)
forwarded_name = capa.features.extractors.helpers.reformat_forwarded_export_name(name)
yield Export(forwarded_name), AbsoluteVirtualAddress(sym.address)
yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address)


def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
"""extract function imports
Expand Down Expand Up @@ -125,15 +143,17 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre
"""
for sym_name in bv.symbols:
for sym in bv.symbols[sym_name]:
if sym.type == SymbolType.LibraryFunctionSymbol:
name = sym.short_name
yield FunctionName(name), sym.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), sym.address
if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]:
continue

name = sym.short_name
yield FunctionName(name), sym.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), sym.address


def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]:
Expand Down
44 changes: 40 additions & 4 deletions capa/features/extractors/binja/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
# See the License for the specific language governing permissions and limitations under the License.
from typing import Tuple, Iterator

from binaryninja import Function, BinaryView, LowLevelILOperation
from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation

from capa.features.file import FunctionName
from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors import loops
Expand All @@ -23,13 +24,27 @@ def extract_function_calls_to(fh: FunctionHandle):
# Everything that is a code reference to the current function is considered a caller, which actually includes
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
# considered a caller to the function
if caller.llil is not None and caller.llil.operation in [
llil = caller.llil
if (llil is None) or llil.operation not in [
LowLevelILOperation.LLIL_CALL,
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
LowLevelILOperation.LLIL_JUMP,
LowLevelILOperation.LLIL_TAILCALL,
]:
yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address)
continue

if llil.dest.value.type not in [
RegisterValueType.ImportedAddressValue,
RegisterValueType.ConstantValue,
RegisterValueType.ConstantPointerValue,
]:
continue

address = llil.dest.value.value
if address != func.start:
continue

yield Characteristic("calls to"), AbsoluteVirtualAddress(caller.address)


def extract_function_loop(fh: FunctionHandle):
Expand Down Expand Up @@ -59,10 +74,31 @@ def extract_recursive_call(fh: FunctionHandle):
yield Characteristic("recursive call"), fh.address


def extract_function_name(fh: FunctionHandle):
"""extract function names (e.g., symtab names)"""
func: Function = fh.inner
bv: BinaryView = func.view
if bv is None:
return

for sym in bv.get_symbols(func.start):
if sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.FunctionSymbol]:
continue

name = sym.short_name
yield FunctionName(name), sym.address
if name.startswith("_"):
# some linkers may prefix linked routines with a `_` to avoid name collisions.
# extract features for both the mangled and un-mangled representations.
# e.g. `_fwrite` -> `fwrite`
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), sym.address


def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]:
for func_handler in FUNCTION_HANDLERS:
for feature, addr in func_handler(fh):
yield feature, addr


FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call)
FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
18 changes: 17 additions & 1 deletion capa/features/extractors/binja/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import List, Callable
from dataclasses import dataclass

from binaryninja import LowLevelILInstruction
from binaryninja import BinaryView, LowLevelILInstruction
from binaryninja.architecture import InstructionTextToken


Expand Down Expand Up @@ -51,3 +51,19 @@ def unmangle_c_name(name: str) -> str:
return match.group(1)

return name


def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
s: List[str] = []
while len(s) < max_len:
try:
c = bv.read(offset + len(s), 1)[0]
except Exception:
break

if c == 0:
break

s.append(chr(c))

return "".join(s)
44 changes: 24 additions & 20 deletions capa/features/extractors/binja/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,28 +94,32 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
candidate_addrs.append(stub_addr)

for address in candidate_addrs:
sym = func.view.get_symbol_at(address)
if sym is None or sym.type not in [SymbolType.ImportAddressSymbol, SymbolType.ImportedFunctionSymbol]:
continue

sym_name = sym.short_name

lib_name = ""
import_lib = bv.lookup_imported_object_library(sym.address)
if import_lib is not None:
lib_name = import_lib[0].name
if lib_name.endswith(".dll"):
lib_name = lib_name[:-4]
elif lib_name.endswith(".so"):
lib_name = lib_name[:-3]

for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name):
yield API(name), ih.address

if sym_name.startswith("_"):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]):
for sym in func.view.get_symbols(address):
if sym is None or sym.type not in [
SymbolType.ImportAddressSymbol,
SymbolType.ImportedFunctionSymbol,
SymbolType.FunctionSymbol,
]:
continue

sym_name = sym.short_name

lib_name = ""
import_lib = bv.lookup_imported_object_library(sym.address)
if import_lib is not None:
lib_name = import_lib[0].name
if lib_name.endswith(".dll"):
lib_name = lib_name[:-4]
elif lib_name.endswith(".so"):
lib_name = lib_name[:-3]

for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name):
yield API(name), ih.address

if sym_name.startswith("_"):
for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym_name[1:]):
yield API(name), ih.address


def extract_insn_number_features(
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle
Expand Down
5 changes: 3 additions & 2 deletions capa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,8 @@ def get_extractor(
sys.path.append(str(bn_api))

try:
from binaryninja import BinaryView, BinaryViewType
import binaryninja
from binaryninja import BinaryView
except ImportError:
raise RuntimeError(
"Cannot import binaryninja module. Please install the Binary Ninja Python API first: "
Expand All @@ -568,7 +569,7 @@ def get_extractor(
import capa.features.extractors.binja.extractor

with halo.Halo(text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not disable_progress):
bv: BinaryView = BinaryViewType.get_view_of_file(str(path))
bv: BinaryView = binaryninja.load(str(path))
if bv is None:
raise RuntimeError(f"Binary Ninja cannot open file {path}")

Expand Down
Binary file added doc/capa_quickstart.pdf
Binary file not shown.
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ dependencies = [
"ruamel.yaml==0.17.32",
"vivisect==1.1.1",
"pefile==2023.2.7",
"pyelftools==0.29",
"pyelftools==0.30",
"dnfile==0.13.0",
"dncil==1.0.2",
"pydantic==2.1.1",
Expand All @@ -62,7 +62,7 @@ packages = ["capa"]
[project.optional-dependencies]
dev = [
"pre-commit==3.4.0",
"pytest==7.4.0",
"pytest==7.4.2",
"pytest-sugar==0.9.7",
"pytest-instafail==0.5.0",
"pytest-cov==4.1.0",
Expand All @@ -77,7 +77,7 @@ dev = [
"flake8-simplify==0.20.0",
"flake8-use-pathlib==0.3.0",
"flake8-copyright==0.2.4",
"ruff==0.0.286",
"ruff==0.0.291",
"black==23.7.0",
"isort==5.11.4",
"mypy==1.5.1",
Expand All @@ -98,7 +98,7 @@ dev = [
build = [
"pyinstaller==5.10.1",
"setuptools==68.0.0",
"build==0.10.0"
"build==1.0.3"
]

[project.urls]
Expand Down
5 changes: 3 additions & 2 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ def get_dnfile_extractor(path: Path):

@lru_cache(maxsize=1)
def get_binja_extractor(path: Path):
from binaryninja import Settings, BinaryViewType
import binaryninja
from binaryninja import Settings

import capa.features.extractors.binja.extractor

Expand All @@ -168,7 +169,7 @@ def get_binja_extractor(path: Path):
if path.name.endswith("kernel32-64.dll_"):
old_pdb = settings.get_bool("pdb.loadGlobalSymbols")
settings.set_bool("pdb.loadGlobalSymbols", False)
bv = BinaryViewType.get_view_of_file(str(path))
bv = binaryninja.load(str(path))
if path.name.endswith("kernel32-64.dll_"):
settings.set_bool("pdb.loadGlobalSymbols", old_pdb)

Expand Down
10 changes: 2 additions & 8 deletions tests/test_binja_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,10 @@
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
@fixtures.parametrize(
"sample,scope,feature,expected",
fixtures.FEATURE_PRESENCE_TESTS,
fixtures.FEATURE_PRESENCE_TESTS + fixtures.FEATURE_SYMTAB_FUNC_TESTS,
indirect=["sample", "scope"],
)
def test_binja_features(sample, scope, feature, expected):
if isinstance(feature, capa.features.file.Export) and "." in str(feature.value):
pytest.xfail("skip Binja unsupported forwarded export feature, see #1646")

if feature == capa.features.common.Characteristic("forwarded export"):
pytest.xfail("skip Binja unsupported forwarded export feature, see #1646")

fixtures.do_test_feature_presence(fixtures.get_binja_extractor, sample, scope, feature, expected)


Expand All @@ -69,4 +63,4 @@ def test_standalone_binja_backend():
@pytest.mark.skipif(binja_present is False, reason="Skip binja tests if the binaryninja Python API is not installed")
def test_binja_version():
version = binaryninja.core_version_info()
assert version.major == 3 and version.minor == 4
assert version.major == 3 and version.minor == 5

0 comments on commit a9daa92

Please sign in to comment.