Skip to content

Commit

Permalink
Merge pull request #2511 from xusheng6/fix_llil_access
Browse files Browse the repository at this point in the history
binja: retrieve the LLIL instruction itself without requesting the entire IL function
  • Loading branch information
mr-tz authored Dec 2, 2024
2 parents 5a284de + b6763ac commit abe8084
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 50 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
- ghidra: fix saving of base address @mr-tz
- binja: support loading raw x86/x86_64 shellcode #2489 @xusheng6
- binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6
- binja: major performance improvement on the binja extractor. #1414 @xusheng6

### capa Explorer Web

Expand Down
21 changes: 7 additions & 14 deletions capa/features/extractors/binja/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
# See the License for the specific language governing permissions and limitations under the License.
from typing import Iterator

from binaryninja import Function, BinaryView, SymbolType, ILException, RegisterValueType, LowLevelILOperation
from binaryninja import Function, BinaryView, SymbolType, LowLevelILOperation

from capa.features.file import FunctionName
from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors import loops
from capa.features.extractors.binja.helpers import get_llil_instr_at_addr
from capa.features.extractors.base_extractor import FunctionHandle


Expand All @@ -24,14 +25,7 @@ def extract_function_calls_to(fh: FunctionHandle):
# Everything that is a code reference to the current function is considered a caller, which actually includes
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
# considered a caller to the function
llil = None
try:
# Temporary fix for https://github.com/Vector35/binaryninja-api/issues/6020. Since `.llil` can throw an
# exception rather than returning None
llil = caller.llil
except ILException:
continue

llil = get_llil_instr_at_addr(func.view, caller.address)
if (llil is None) or llil.operation not in [
LowLevelILOperation.LLIL_CALL,
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
Expand All @@ -40,14 +34,13 @@ def extract_function_calls_to(fh: FunctionHandle):
]:
continue

if llil.dest.value.type not in [
RegisterValueType.ImportedAddressValue,
RegisterValueType.ConstantValue,
RegisterValueType.ConstantPointerValue,
if llil.dest.operation not in [
LowLevelILOperation.LLIL_CONST,
LowLevelILOperation.LLIL_CONST_PTR,
]:
continue

address = llil.dest.value.value
address = llil.dest.constant
if address != func.start:
continue

Expand Down
14 changes: 12 additions & 2 deletions capa/features/extractors/binja/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import re
from typing import Callable
from typing import Callable, Optional
from dataclasses import dataclass

from binaryninja import BinaryView, LowLevelILInstruction
from binaryninja import BinaryView, LowLevelILFunction, LowLevelILInstruction
from binaryninja.architecture import InstructionTextToken


Expand Down Expand Up @@ -67,3 +67,13 @@ def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
s.append(chr(c))

return "".join(s)


def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInstruction]:
arch = bv.arch
buffer = bv.read(addr, arch.max_instr_length)
llil = LowLevelILFunction(arch=arch)
llil.current_address = addr
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
return None
return llil[0]
50 changes: 16 additions & 34 deletions capa/features/extractors/binja/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
BinaryView,
ILRegister,
SymbolType,
ILException,
BinaryReader,
RegisterValueType,
LowLevelILOperation,
Expand All @@ -24,7 +23,7 @@
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs, get_llil_instr_at_addr
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle

# security cookie checks may perform non-zeroing XORs, these are expected within a certain
Expand All @@ -37,40 +36,23 @@
# 2. The function must only make one call/jump to another address
# If the function being checked is a stub function, returns the target address. Otherwise, return None.
def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
funcs = bv.get_functions_at(addr)
for func in funcs:
if len(func.basic_blocks) != 1:
continue

call_count = 0
call_target = None
try:
llil = func.llil
except ILException:
return None
llil = get_llil_instr_at_addr(bv, addr)
if llil is None or llil.operation not in [
LowLevelILOperation.LLIL_CALL,
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
LowLevelILOperation.LLIL_JUMP,
LowLevelILOperation.LLIL_TAILCALL,
]:
return None

if llil is None:
continue
if llil.dest.value.type not in [
RegisterValueType.ImportedAddressValue,
RegisterValueType.ConstantValue,
RegisterValueType.ConstantPointerValue,
]:
return None

for il in llil.instructions:
if il.operation in [
LowLevelILOperation.LLIL_CALL,
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
LowLevelILOperation.LLIL_JUMP,
LowLevelILOperation.LLIL_TAILCALL,
]:
call_count += 1
if il.dest.value.type in [
RegisterValueType.ImportedAddressValue,
RegisterValueType.ConstantValue,
RegisterValueType.ConstantPointerValue,
]:
call_target = il.dest.value.value

if call_count == 1 and call_target is not None:
return call_target

return None
return llil.dest.value.value


def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:
Expand Down

0 comments on commit abe8084

Please sign in to comment.