diff --git a/CHANGELOG.md b/CHANGELOG.md index 57d720016..c58cd53a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### New Features - support analyzing DRAKVUF traces #2143 @yelhamer +- IDA extractor: extract names from dynamically resolved APIs stored in renamed global variables #2201 @Ana06 ### Breaking Changes diff --git a/README.md b/README.md index 882b5cb3d..c1cb9bee3 100644 --- a/README.md +++ b/README.md @@ -259,6 +259,7 @@ Please learn to write rules and contribute new entries as you find interesting t If you use IDA Pro, then you can use the [capa explorer](https://github.com/mandiant/capa/tree/master/capa/ida/plugin) plugin. capa explorer helps you identify interesting areas of a program and build new capa rules using features extracted directly from your IDA Pro database. +It also uses your local changes to the .idb to extract better features, such as when you rename a global variable that contains a dynamically resolved API address. ![capa + IDA Pro integration](https://github.com/mandiant/capa/blob/master/doc/img/explorer_expanded.png) diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index e031b7a59..de0dbe6f8 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -5,9 +5,11 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. +import re from typing import Any, Dict, Tuple, Iterator import idc +import ida_ua import idaapi import idautils @@ -35,7 +37,7 @@ def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]: return ctx["externs_cache"] -def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[Any]: +def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> None: """check instruction for API call""" info = () ref = insn.ea @@ -52,7 +54,7 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[A except IndexError: break - info = funcs.get(ref, ()) + info = funcs.get(ref, None) if info: break @@ -60,8 +62,7 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Iterator[A if not f or not (f.flags & idaapi.FUNC_THUNK): break - if info: - yield info + return info def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: @@ -76,16 +77,38 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) if insn.get_canon_mnem() not in ("call", "jmp"): return - # check calls to imported functions - for api in check_for_api_call(insn, get_imports(fh.ctx)): + # check call to imported functions + api = check_for_api_call(insn, get_imports(fh.ctx)) + if api: # tuple (, , ) for name in capa.features.extractors.helpers.generate_symbols(api[0], api[1]): yield API(name), ih.address + # a call instruction should only call one function, stop if a call to an import is extracted + return - # check calls to extern functions - for api in check_for_api_call(insn, get_externs(fh.ctx)): + # check call to extern functions + api = check_for_api_call(insn, get_externs(fh.ctx)) + if api: # tuple (, , ) yield API(api[1]), ih.address + # a call instruction should only call one function, stop if a call to an extern is extracted + return + + # extract dynamically resolved APIs stored in renamed globals (renamed for example using `renimp.idc`) + if insn.Op1.type == ida_ua.o_mem: + op_addr = insn.Op1.addr + op_name = idaapi.get_name(op_addr) + # when renaming a global using an API name, IDA assigns it the function type + # ensure we do not extract something wrong by checking that the address has a name and a type + # we could check that the type is a function definition, but that complicates the code + if (not op_name.startswith("off_")) and idc.get_type(op_addr): + # Remove suffix used in repeated names, for example _0 in VirtualFree_0 + match = re.match(r"(.+)_\d+", op_name) + if match: + op_name = match.group(1) + # We global name does not include the DLL name, so we can't extract it + for name in capa.features.extractors.helpers.generate_symbols("", op_name): + yield API(name), ih.address # extract IDA/FLIRT recognized API functions targets = tuple(idautils.CodeRefsFrom(insn.ea, False)) diff --git a/capa/ida/plugin/README.md b/capa/ida/plugin/README.md index 4bf3616cb..b17157dca 100644 --- a/capa/ida/plugin/README.md +++ b/capa/ida/plugin/README.md @@ -81,6 +81,7 @@ can update using the `Settings` button. * Double-click the `Address` column to navigate your Disassembly view to the address of the associated feature * Double-click a result in the `Rule Information` column to expand its children * Select a checkbox in the `Rule Information` column to highlight the address of the associated feature in your Disassembly view +* Re-analyse if you rename global variables used to store dynamically resolved APIs as capa will use them to improve the analysis. #### Tips for Rule Generator diff --git a/pyproject.toml b/pyproject.toml index 9f0342b7f..f02dbb619 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -188,6 +188,7 @@ known_first_party = [ "ida_loader", "ida_nalt", "ida_segment", + "ida_ua", "idaapi", "idautils", "idc",