Skip to content

Commit

Permalink
binja: move the stack string detection to the function level. Fix man…
Browse files Browse the repository at this point in the history
  • Loading branch information
xusheng6 committed Dec 4, 2024
1 parent 4448d61 commit 87f44e2
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 117 deletions.
98 changes: 4 additions & 94 deletions capa/features/extractors/binja/basicblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,111 +5,22 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.

import string
from typing import Iterator

from binaryninja import Function
from binaryninja import Function, BinaryView
from binaryninja import BasicBlock as BinjaBasicBlock
from binaryninja import (
BinaryView,
SymbolType,
RegisterValueType,
VariableSourceType,
MediumLevelILOperation,
MediumLevelILBasicBlock,
MediumLevelILInstruction,
)

from capa.features.common import Feature, Characteristic
from capa.features.address import Address
from capa.features.basicblock import BasicBlock
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
from capa.features.extractors.base_extractor import BBHandle, FunctionHandle


def get_printable_len_ascii(s: bytes) -> int:
"""Return string length if all operand bytes are ascii or utf16-le printable"""
count = 0
for c in s:
if c == 0:
return count
if c < 127 and chr(c) in string.printable:
count += 1
return count


def get_printable_len_wide(s: bytes) -> int:
"""Return string length if all operand bytes are ascii or utf16-le printable"""
if all(c == 0x00 for c in s[1::2]):
return get_printable_len_ascii(s[::2])
return 0


def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
bv: BinaryView = f.view

if il.operation != MediumLevelILOperation.MLIL_CALL:
return 0

target = il.dest
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
return 0

addr = target.value.value
sym = bv.get_symbol_at(addr)
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
return 0

if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
return 0

if len(il.params) < 2:
return 0

dest = il.params[0]
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
var = dest.src
else:
return 0

if var.source_type != VariableSourceType.StackVariableSourceType:
return 0

src = il.params[1]
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
return 0

s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))


def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
"""check basic block for stackstring indicators
true if basic block contains enough moves of constant bytes to the stack
"""
count = 0
for il in bb:
count += get_stack_string_len(f, il)
if count > MIN_STACKSTRING_LEN:
return True

return False


def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
"""extract stackstring indicators from basic block"""
bb: tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]):
yield Characteristic("stack string"), bbh.address


def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
"""extract tight loop indicators from a basic block"""
bb: tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner
for edge in bb[0].outgoing_edges:
if edge.target.start == bb[0].start:
bb: BinjaBasicBlock = bbh.inner
for edge in bb.outgoing_edges:
if edge.target.start == bb.start:
yield Characteristic("tight loop"), bbh.address


Expand All @@ -123,5 +34,4 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Featur

BASIC_BLOCK_HANDLERS = (
extract_bb_tight_loop,
extract_bb_stackstring,
)
24 changes: 4 additions & 20 deletions capa/features/extractors/binja/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from typing import Iterator

import binaryninja as binja
from binaryninja import ILException

import capa.features.extractors.elf
import capa.features.extractors.binja.file
Expand Down Expand Up @@ -54,34 +53,19 @@ def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Featur

def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]:
f: binja.Function = fh.inner
# Set up a MLIL basic block dict look up to associate the disassembly basic block with its MLIL basic block
mlil_lookup = {}
try:
mlil = f.mlil
except ILException:
return

if mlil is None:
return

for mlil_bb in mlil.basic_blocks:
mlil_lookup[mlil_bb.source_block.start] = mlil_bb

for bb in f.basic_blocks:
mlil_bb = mlil_lookup.get(bb.start)

yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb))
yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=bb)

def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]:
yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh)

def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]:
import capa.features.extractors.binja.helpers as binja_helpers

bb: tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner
addr = bb[0].start
bb: binja.BasicBlock = bbh.inner
addr = bb.start

for text, length in bb[0]:
for text, length in bb:
insn = binja_helpers.DisassemblyInstruction(addr, length, text)
yield InsnHandle(address=AbsoluteVirtualAddress(addr), inner=insn)
addr += length
Expand Down
110 changes: 108 additions & 2 deletions capa/features/extractors/binja/function.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,27 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import string
from typing import Iterator

from binaryninja import Function, BinaryView, SymbolType, LowLevelILOperation
from binaryninja import (
Function,
BinaryView,
SymbolType,
ILException,
RegisterValueType,
VariableSourceType,
LowLevelILOperation,
MediumLevelILOperation,
MediumLevelILBasicBlock,
MediumLevelILInstruction,
)

from capa.features.file import FunctionName
from capa.features.common import Feature, Characteristic
from capa.features.address import Address, AbsoluteVirtualAddress
from capa.features.extractors import loops
from capa.features.extractors.helpers import MIN_STACKSTRING_LEN
from capa.features.extractors.binja.helpers import get_llil_instr_at_addr
from capa.features.extractors.base_extractor import FunctionHandle

Expand Down Expand Up @@ -95,10 +108,103 @@ def extract_function_name(fh: FunctionHandle):
yield FunctionName(name[1:]), sym.address


def get_printable_len_ascii(s: bytes) -> int:
"""Return string length if all operand bytes are ascii or utf16-le printable"""
count = 0
for c in s:
if c == 0:
return count
if c < 127 and chr(c) in string.printable:
count += 1
return count


def get_printable_len_wide(s: bytes) -> int:
"""Return string length if all operand bytes are ascii or utf16-le printable"""
if all(c == 0x00 for c in s[1::2]):
return get_printable_len_ascii(s[::2])
return 0


def get_stack_string_len(f: Function, il: MediumLevelILInstruction) -> int:
bv: BinaryView = f.view

if il.operation != MediumLevelILOperation.MLIL_CALL:
return 0

target = il.dest
if target.operation not in [MediumLevelILOperation.MLIL_CONST, MediumLevelILOperation.MLIL_CONST_PTR]:
return 0

addr = target.value.value
sym = bv.get_symbol_at(addr)
if not sym or sym.type not in [SymbolType.LibraryFunctionSymbol, SymbolType.SymbolicFunctionSymbol]:
return 0

if sym.name not in ["__builtin_strncpy", "__builtin_strcpy", "__builtin_wcscpy"]:
return 0

if len(il.params) < 2:
return 0

dest = il.params[0]
if dest.operation in [MediumLevelILOperation.MLIL_ADDRESS_OF, MediumLevelILOperation.MLIL_VAR]:
var = dest.src
else:
return 0

if var.source_type != VariableSourceType.StackVariableSourceType:
return 0

src = il.params[1]
if src.value.type != RegisterValueType.ConstantDataAggregateValue:
return 0

s = f.get_constant_data(RegisterValueType.ConstantDataAggregateValue, src.value.value)
return max(get_printable_len_ascii(bytes(s)), get_printable_len_wide(bytes(s)))


def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool:
"""check basic block for stackstring indicators
true if basic block contains enough moves of constant bytes to the stack
"""
count = 0
for il in bb:
count += get_stack_string_len(f, il)
if count > MIN_STACKSTRING_LEN:
return True

return False


def extract_stackstring(fh: FunctionHandle):
"""extract stackstring indicators"""
func: Function = fh.inner
bv: BinaryView = func.view
if bv is None:
return

try:
mlil = func.mlil
except ILException:
return

for block in mlil.basic_blocks:
if bb_contains_stackstring(func, block):
yield Characteristic("stack string"), block.source_block.start


def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]:
for func_handler in FUNCTION_HANDLERS:
for feature, addr in func_handler(fh):
yield feature, addr


FUNCTION_HANDLERS = (extract_function_calls_to, extract_function_loop, extract_recursive_call, extract_function_name)
FUNCTION_HANDLERS = (
extract_function_calls_to,
extract_function_loop,
extract_recursive_call,
extract_function_name,
extract_stackstring,
)
2 changes: 1 addition & 1 deletion capa/features/extractors/binja/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index
# e.g., <llil: eax = 0>, (LLIL_SET_REG). So we do not need to check whether the two operands are the same.
if il.operation == LowLevelILOperation.LLIL_XOR:
# Exclude cases related to the stack cookie
if is_nzxor_stack_cookie(fh.inner, bbh.inner[0], il):
if is_nzxor_stack_cookie(fh.inner, bbh.inner, il):
return False
results.append((Characteristic("nzxor"), ih.address))
return False
Expand Down

0 comments on commit 87f44e2

Please sign in to comment.