Skip to content

Commit

Permalink
ida extractor: extract APIs from renamed globals
Browse files Browse the repository at this point in the history
Add support to extract dynamically resolved APIs stored in global
variables that have been renamed (for example using the `renimp.idc`
script included with IDA Pro).
  • Loading branch information
Ana06 committed Aug 8, 2024
1 parent 1564f24 commit 658284d
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

### New Features
- support analyzing DRAKVUF traces #2143 @yelhamer
- IDA extractor: extract names from dynamically resolved APIs stored in renamed global variables #2201 @Ana06

### Breaking Changes

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ Please learn to write rules and contribute new entries as you find interesting t
If you use IDA Pro, then you can use the [capa explorer](https://github.com/mandiant/capa/tree/master/capa/ida/plugin) plugin.
capa explorer helps you identify interesting areas of a program and build new capa rules using features extracted directly from your IDA Pro database.
It also uses your local changes to the .idb to extract better features, such as when you rename a global variable that contains a dynamically resolved API address.
![capa + IDA Pro integration](https://github.com/mandiant/capa/blob/master/doc/img/explorer_expanded.png)
Expand Down
39 changes: 31 additions & 8 deletions capa/features/extractors/ida/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import re
from typing import Any, Dict, Tuple, Iterator

import idc
import ida_ua
import idaapi
import idautils

Expand Down Expand Up @@ -35,7 +37,7 @@ def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]:
return ctx["externs_cache"]


def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[Any]:
def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> None:
"""check instruction for API call"""
info = ()
ref = insn.ea
Expand All @@ -52,16 +54,15 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[A
except IndexError:
break

info = funcs.get(ref, ())
info = funcs.get(ref, None)
if info:
break

f = idaapi.get_func(ref)
if not f or not (f.flags & idaapi.FUNC_THUNK):
break

if info:
yield info
return info


def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]:
Expand All @@ -76,16 +77,38 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle)
if insn.get_canon_mnem() not in ("call", "jmp"):
return

# check calls to imported functions
for api in check_for_api_call(insn, get_imports(fh.ctx)):
# check call to imported functions
api = check_for_api_call(insn, get_imports(fh.ctx))
if api:
# tuple (<module>, <function>, <ordinal>)
for name in capa.features.extractors.helpers.generate_symbols(api[0], api[1]):
yield API(name), ih.address
# a call instruction should only call one function, stop if a call to an import is extracted
return

# check calls to extern functions
for api in check_for_api_call(insn, get_externs(fh.ctx)):
# check call to extern functions
api = check_for_api_call(insn, get_externs(fh.ctx))
if api:
# tuple (<module>, <function>, <ordinal>)
yield API(api[1]), ih.address
# a call instruction should only call one function, stop if a call to an extern is extracted
return

# extract dynamically resolved APIs stored in renamed globals (renamed for example using `renimp.idc`)
if insn.Op1.type == ida_ua.o_mem:
op_addr = insn.Op1.addr
op_name = idaapi.get_name(op_addr)
# when renaming a global using an API name, IDA assigns it the function type
# ensure we do not extract something wrong by checking that the address has a name and a type
# we could check that the type is a function definition, but that complicates the code
if (not op_name.startswith("off_")) and idc.get_type(op_addr):
# Remove suffix used in repeated names, for example _0 in VirtualFree_0
match = re.match(r"(.+)_\d+", op_name)
if match:
op_name = match.group(1)
# We global name does not include the DLL name, so we can't extract it
for name in capa.features.extractors.helpers.generate_symbols("", op_name):
yield API(name), ih.address

# extract IDA/FLIRT recognized API functions
targets = tuple(idautils.CodeRefsFrom(insn.ea, False))
Expand Down
1 change: 1 addition & 0 deletions capa/ida/plugin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ can update using the `Settings` button.
* Double-click the `Address` column to navigate your Disassembly view to the address of the associated feature
* Double-click a result in the `Rule Information` column to expand its children
* Select a checkbox in the `Rule Information` column to highlight the address of the associated feature in your Disassembly view
* Re-analyse if you rename global variables used to store dynamically resolved APIs as capa will use them to improve the analysis.
#### Tips for Rule Generator
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ known_first_party = [
"ida_loader",
"ida_nalt",
"ida_segment",
"ida_ua",
"idaapi",
"idautils",
"idc",
Expand Down

0 comments on commit 658284d

Please sign in to comment.