-
Notifications
You must be signed in to change notification settings - Fork 567
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
binexport: init refactor for multi-arch instruction feature parsing
- Loading branch information
1 parent
210f127
commit 877134e
Showing
9 changed files
with
567 additions
and
373 deletions.
There are no files selected for viewing
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at: [package root]/LICENSE.txt | ||
# Unless required by applicable law or agreed to in writing, software distributed under the License | ||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and limitations under the License. | ||
import logging | ||
from typing import List, Tuple, Iterator, Optional | ||
|
||
import capa.features.extractors.binexport2.helpers | ||
from capa.features.insn import Number, Offset, OperandNumber, OperandOffset | ||
from capa.features.common import Feature, Characteristic | ||
from capa.features.address import Address | ||
from capa.features.extractors.binexport2 import FunctionContext, InstructionContext | ||
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle | ||
from capa.features.extractors.binexport2.helpers import ( | ||
mask_immediate, | ||
is_address_mapped, | ||
get_operand_expressions, | ||
get_operand_immediate_expression, | ||
) | ||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def extract_insn_number_features( | ||
fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle | ||
) -> Iterator[Tuple[Feature, Address]]: | ||
fhi: FunctionContext = fh.inner | ||
ii: InstructionContext = ih.inner | ||
|
||
be2: BinExport2 = fhi.ctx.be2 | ||
|
||
instruction_index: int = ii.instruction_index | ||
instruction: BinExport2.Instruction = be2.instruction[instruction_index] | ||
|
||
if len(instruction.operand_index) == 0: | ||
# skip things like: | ||
# .text:0040116e leave | ||
return | ||
|
||
for i, operand_index in enumerate(instruction.operand_index): | ||
operand: BinExport2.Operand = be2.operand[operand_index] | ||
|
||
immediate_expression: Optional[BinExport2.Expression] = get_operand_immediate_expression(be2, operand) | ||
if not immediate_expression: | ||
continue | ||
|
||
value: int = mask_immediate(fhi.arch, immediate_expression.immediate) | ||
if is_address_mapped(be2, value): | ||
continue | ||
|
||
yield Number(value), ih.address | ||
yield OperandNumber(i, value), ih.address | ||
|
||
|
||
def extract_insn_offset_features( | ||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle | ||
) -> Iterator[Tuple[Feature, Address]]: | ||
fhi: FunctionContext = fh.inner | ||
ii: InstructionContext = ih.inner | ||
|
||
be2: BinExport2 = fhi.ctx.be2 | ||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] | ||
|
||
if len(instruction.operand_index) == 0: | ||
# skip things like: | ||
# .text:0040116e leave | ||
return | ||
|
||
mnemonic: str = be2.mnemonic[instruction.mnemonic_index].name.lower() | ||
|
||
for i, operand_index in enumerate(instruction.operand_index): | ||
operand: BinExport2.Operand = be2.operand[operand_index] | ||
|
||
is_dereference = False | ||
for expression_index in operand.expression_index: | ||
if be2.expression[expression_index].type == BinExport2.Expression.DEREFERENCE: | ||
is_dereference = True | ||
break | ||
|
||
if not is_dereference: | ||
continue | ||
|
||
if mnemonic == "ldp": | ||
# like: | ||
# 0013a2f0 ldp x22,x9,[x21, #0x18] | ||
expressions: List[BinExport2.Expression] = get_operand_expressions(be2, operand) | ||
if len(expressions) <= 2: | ||
continue | ||
|
||
if expressions[1].symbol.lower().endswith("sp"): | ||
continue | ||
|
||
value = mask_immediate(fhi.arch, expressions[-1].immediate) | ||
|
||
if not is_address_mapped(be2, value): | ||
value = capa.features.extractors.binexport2.helpers.twos_complement(fhi.arch, value) | ||
|
||
yield Offset(value), ih.address | ||
yield OperandOffset(i, value), ih.address | ||
|
||
|
||
def extract_insn_nzxor_characteristic_features( | ||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle | ||
) -> Iterator[Tuple[Feature, Address]]: | ||
fhi: FunctionContext = fh.inner | ||
ii: InstructionContext = ih.inner | ||
|
||
be2: BinExport2 = fhi.ctx.be2 | ||
|
||
instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] | ||
mnemonic: str = be2.mnemonic[instruction.mnemonic_index].name.lower() | ||
|
||
if mnemonic != "eor": | ||
return | ||
|
||
operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] | ||
|
||
assert len(operands) == 3 | ||
|
||
if operands[1] != operands[2]: | ||
yield Characteristic("nzxor"), ih.address | ||
|
||
|
||
def extract_function_indirect_call_characteristic_features( | ||
fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle | ||
) -> Iterator[Tuple[Feature, Address]]: | ||
yield from () |
Empty file.
135 changes: 135 additions & 0 deletions
135
capa/features/extractors/binexport2/arch/intel/helpers.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
# Copyright (C) 2024 Mandiant, Inc. All Rights Reserved. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at: [package root]/LICENSE.txt | ||
# Unless required by applicable law or agreed to in writing, software distributed under the License | ||
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and limitations under the License. | ||
from typing import List, Optional | ||
from dataclasses import dataclass | ||
|
||
from capa.features.extractors.binexport2.helpers import get_operand_expressions | ||
from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 | ||
|
||
# security cookie checks may perform non-zeroing XORs, these are expected within a certain | ||
# byte range within the first and returning basic blocks, this helps to reduce FP features | ||
SECURITY_COOKIE_BYTES_DELTA: int = 0x40 | ||
|
||
|
||
@dataclass | ||
class OperandPhraseInfo: | ||
scale: Optional[BinExport2.Expression] = None | ||
index: Optional[BinExport2.Expression] = None | ||
base: Optional[BinExport2.Expression] = None | ||
displacement: Optional[BinExport2.Expression] = None | ||
|
||
|
||
def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Optional[OperandPhraseInfo]: | ||
# assume the following (see https://blog.yossarian.net/2020/06/13/How-x86_64-addresses-memory): | ||
# | ||
# Scale: A 2-bit constant factor | ||
# Index: Any general purpose register | ||
# Base: Any general purpose register | ||
# Displacement: An integral offset | ||
|
||
expressions: List[BinExport2.Expression] = get_operand_expressions(be2, operand) | ||
|
||
# skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller | ||
# has checked for BinExport2.Expression.DEREFERENCE | ||
for i, expression in enumerate(expressions): | ||
if expression.type == BinExport2.Expression.DEREFERENCE: | ||
expressions = expressions[i + 1 :] | ||
break | ||
|
||
expression0: BinExport2.Expression | ||
expression1: BinExport2.Expression | ||
expression2: BinExport2.Expression | ||
expression3: BinExport2.Expression | ||
expression4: BinExport2.Expression | ||
|
||
if len(expressions) == 1: | ||
expression0 = expressions[0] | ||
|
||
assert ( | ||
expression0.type == BinExport2.Expression.IMMEDIATE_INT | ||
or expression0.type == BinExport2.Expression.REGISTER | ||
) | ||
|
||
if expression0.type == BinExport2.Expression.IMMEDIATE_INT: | ||
# Displacement | ||
return OperandPhraseInfo(displacement=expression0) | ||
elif expression0.type == BinExport2.Expression.REGISTER: | ||
# Base | ||
return OperandPhraseInfo(base=expression0) | ||
|
||
elif len(expressions) == 3: | ||
expression0 = expressions[0] | ||
expression1 = expressions[1] | ||
expression2 = expressions[2] | ||
|
||
assert expression0.type == BinExport2.Expression.REGISTER | ||
assert expression1.type == BinExport2.Expression.OPERATOR | ||
assert ( | ||
expression2.type == BinExport2.Expression.IMMEDIATE_INT | ||
or expression2.type == BinExport2.Expression.REGISTER | ||
) | ||
|
||
if expression2.type == BinExport2.Expression.REGISTER: | ||
# Base + Index | ||
return OperandPhraseInfo(base=expression0, index=expression2) | ||
elif expression2.type == BinExport2.Expression.IMMEDIATE_INT: | ||
# Base + Displacement | ||
return OperandPhraseInfo(base=expression0, displacement=expression2) | ||
|
||
elif len(expressions) == 5: | ||
expression0 = expressions[0] | ||
expression1 = expressions[1] | ||
expression2 = expressions[2] | ||
expression3 = expressions[3] | ||
expression4 = expressions[4] | ||
|
||
assert expression0.type == BinExport2.Expression.REGISTER | ||
assert expression1.type == BinExport2.Expression.OPERATOR | ||
assert ( | ||
expression2.type == BinExport2.Expression.REGISTER | ||
or expression2.type == BinExport2.Expression.IMMEDIATE_INT | ||
) | ||
assert expression3.type == BinExport2.Expression.OPERATOR | ||
assert expression4.type == BinExport2.Expression.IMMEDIATE_INT | ||
|
||
if expression1.symbol == "+" and expression3.symbol == "+": | ||
# Base + Index + Displacement | ||
return OperandPhraseInfo(base=expression0, index=expression2, displacement=expression4) | ||
elif expression1.symbol == "+" and expression3.symbol == "*": | ||
# Base + (Index * Scale) | ||
return OperandPhraseInfo(base=expression0, index=expression2, scale=expression3) | ||
elif expression1.symbol == "*" and expression3.symbol == "+": | ||
# (Index * Scale) + Displacement | ||
return OperandPhraseInfo(index=expression0, scale=expression2, displacement=expression3) | ||
else: | ||
raise NotImplementedError(expression1.symbol, expression3.symbol) | ||
|
||
elif len(expressions) == 7: | ||
expression0 = expressions[0] | ||
expression1 = expressions[1] | ||
expression2 = expressions[2] | ||
expression3 = expressions[3] | ||
expression4 = expressions[4] | ||
expression5 = expressions[5] | ||
expression6 = expressions[6] | ||
|
||
assert expression0.type == BinExport2.Expression.REGISTER | ||
assert expression1.type == BinExport2.Expression.OPERATOR | ||
assert expression2.type == BinExport2.Expression.REGISTER | ||
assert expression3.type == BinExport2.Expression.OPERATOR | ||
assert expression4.type == BinExport2.Expression.IMMEDIATE_INT | ||
assert expression5.type == BinExport2.Expression.OPERATOR | ||
assert expression6.type == BinExport2.Expression.IMMEDIATE_INT | ||
|
||
# Base + (Index * Scale) + Displacement | ||
return OperandPhraseInfo(base=expression0, index=expression2, scale=expression4, displacement=expression6) | ||
|
||
else: | ||
raise NotImplementedError(len(expressions)) | ||
|
||
return None |
Oops, something went wrong.