From 683524b902efd107ef6d6aa712677060274ffa86 Mon Sep 17 00:00:00 2001 From: mr-tz Date: Fri, 20 Oct 2023 10:59:51 +0200 Subject: [PATCH] keep DLL name for import features --- capa/features/extractors/binja/file.py | 4 ++-- capa/features/extractors/cape/file.py | 2 +- capa/features/extractors/dotnetfile.py | 2 +- capa/features/extractors/ghidra/file.py | 2 +- capa/features/extractors/ghidra/helpers.py | 2 +- capa/features/extractors/helpers.py | 25 ++++++++++++++++------ capa/features/extractors/ida/file.py | 4 ++-- capa/features/extractors/pefile.py | 2 +- capa/features/extractors/viv/file.py | 2 +- tests/fixtures.py | 11 ++++------ 10 files changed, 33 insertions(+), 23 deletions(-) diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index 84b25348b..0054e62b1 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -115,13 +115,13 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address for sym in bv.get_symbols_of_type(SymbolType.ImportAddressSymbol): lib_name = str(sym.namespace) addr = AbsoluteVirtualAddress(sym.address) - for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name): + for name in capa.features.extractors.helpers.generate_symbols(lib_name, sym.short_name, include_dll=True): yield Import(name), addr ordinal = sym.ordinal if ordinal != 0 and (lib_name != ""): ordinal_name = f"#{ordinal}" - for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name): + for name in capa.features.extractors.helpers.generate_symbols(lib_name, ordinal_name, include_dll=True): yield Import(name), addr diff --git a/capa/features/extractors/cape/file.py b/capa/features/extractors/cape/file.py index 67ef0fe2e..3143504c0 100644 --- a/capa/features/extractors/cape/file.py +++ b/capa/features/extractors/cape/file.py @@ -58,7 +58,7 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address] if not function.name: continue - for name in generate_symbols(library.dll, function.name): + for name in generate_symbols(library.dll, function.name, include_dll=True): yield Import(name), AbsoluteVirtualAddress(function.address) diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index ff942ae72..a9d36d299 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -57,7 +57,7 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor for imp in get_dotnet_unmanaged_imports(pe): # like kernel32.CreateFileA - for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method): + for name in capa.features.extractors.helpers.generate_symbols(imp.module, imp.method, include_dll=True): yield Import(name), DNTokenAddress(imp.token) diff --git a/capa/features/extractors/ghidra/file.py b/capa/features/extractors/ghidra/file.py index 047205022..118575c17 100644 --- a/capa/features/extractors/ghidra/file.py +++ b/capa/features/extractors/ghidra/file.py @@ -112,7 +112,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: if "Ordinal_" in fstr[1]: fstr[1] = f"#{fstr[1].split('_')[1]}" - for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1]): + for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1], include_dll=True): yield Import(name), AbsoluteVirtualAddress(addr) diff --git a/capa/features/extractors/ghidra/helpers.py b/capa/features/extractors/ghidra/helpers.py index 0f405870d..2e953437f 100644 --- a/capa/features/extractors/ghidra/helpers.py +++ b/capa/features/extractors/ghidra/helpers.py @@ -102,7 +102,7 @@ def get_file_imports() -> Dict[int, List[str]]: # mostly shows up in ELF files, otherwise, strip '.dll' w/ [:-4] fstr[0] = "*" if "" in fstr[0] else fstr[0][:-4] - for name in capa.features.extractors.helpers.generate_symbols(fstr[0], fstr[1]): + for name in capa.features.extractors.helpers.generate_symbols(fstr[0], fstr[1], include_dll=True): import_dict.setdefault(addr, []).append(name) if ex_loc: import_dict.setdefault(ex_loc.getOffset(), []).append(name) diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py index 8274e67e5..71d28ef52 100644 --- a/capa/features/extractors/helpers.py +++ b/capa/features/extractors/helpers.py @@ -41,7 +41,7 @@ def is_ordinal(symbol: str) -> bool: return False -def generate_symbols(dll: str, symbol: str) -> Iterator[str]: +def generate_symbols(dll: str, symbol: str, include_dll=False) -> Iterator[str]: """ for a given dll and symbol name, generate variants. we over-generate features to make matching easier. @@ -50,22 +50,35 @@ def generate_symbols(dll: str, symbol: str) -> Iterator[str]: - CreateFile - ws2_32.#1 - note that since v7 dll names are NOT included anymore except for ordinals - dlls are good for documentation but not used during matching + note that since capa v7 only `import` features include DLL names: + - kernel32.CreateFileA + - kernel32.CreateFile + + for `api` features dll names are good for documentation but not used during matching """ # normalize dll name dll = dll.lower() + # trim extensions observed in dynamic traces + dll = dll[0:-4] if dll.endswith(".dll") else dll + dll = dll[0:-4] if dll.endswith(".drv") else dll + + if include_dll: + # ws2_32.#1 + # kernel32.CreateFileA + yield f"{dll}.{symbol}" + if not is_ordinal(symbol): # CreateFileA yield symbol + if include_dll: + # kernel32.CreateFile + yield f"{dll}.{symbol[:-1]}" + if is_aw_function(symbol): # CreateFile yield symbol[:-1] - elif dll: - # ws2_32.#1 - yield f"{dll}.{symbol}" def reformat_forwarded_export_name(forwarded_name: str) -> str: diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index efa4b66c7..24f9528fd 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -110,7 +110,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: if info[1] and info[2]: # e.g. in mimikatz: ('cabinet', 'FCIAddFile', 11L) # extract by name here and by ordinal below - for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1]): + for name in capa.features.extractors.helpers.generate_symbols(info[0], info[1], include_dll=True): yield Import(name), addr dll = info[0] symbol = f"#{info[2]}" @@ -123,7 +123,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: else: continue - for name in capa.features.extractors.helpers.generate_symbols(dll, symbol): + for name in capa.features.extractors.helpers.generate_symbols(dll, symbol, include_dll=True): yield Import(name), addr for ea, info in capa.features.extractors.ida.helpers.get_file_externs().items(): diff --git a/capa/features/extractors/pefile.py b/capa/features/extractors/pefile.py index 55e0688ee..abd917c07 100644 --- a/capa/features/extractors/pefile.py +++ b/capa/features/extractors/pefile.py @@ -84,7 +84,7 @@ def extract_file_import_names(pe, **kwargs): except UnicodeDecodeError: continue - for name in capa.features.extractors.helpers.generate_symbols(modname, impname): + for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True): yield Import(name), AbsoluteVirtualAddress(imp.address) diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index 204d8e693..52d56accd 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -73,7 +73,7 @@ def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]] impname = "#" + impname[len("ord") :] addr = AbsoluteVirtualAddress(va) - for name in capa.features.extractors.helpers.generate_symbols(modname, impname): + for name in capa.features.extractors.helpers.generate_symbols(modname, impname, include_dll=True): yield Import(name), addr diff --git a/tests/fixtures.py b/tests/fixtures.py index 85d63b21b..40ff5a589 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -776,20 +776,17 @@ def parametrize(params, values, **kwargs): # forwarded export ("ea2876", "file", capa.features.file.Export("vresion.GetFileVersionInfoA"), True), # file/imports - # not extracting dll anymore - ("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), False), + ("mimikatz", "file", capa.features.file.Import("advapi32.CryptSetHashParam"), True), ("mimikatz", "file", capa.features.file.Import("CryptSetHashParam"), True), ("mimikatz", "file", capa.features.file.Import("kernel32.IsWow64Process"), True), - # not extracting dll anymore + ("mimikatz", "file", capa.features.file.Import("IsWow64Process"), True), ("mimikatz", "file", capa.features.file.Import("msvcrt.exit"), True), - ("mimikatz", "file", capa.features.file.Import("exit"), False), ("mimikatz", "file", capa.features.file.Import("cabinet.#11"), True), ("mimikatz", "file", capa.features.file.Import("#11"), False), ("mimikatz", "file", capa.features.file.Import("#nope"), False), ("mimikatz", "file", capa.features.file.Import("nope"), False), - # not extracting dll anymore - ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContextW"), False), - ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContext"), False), + ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContextW"), True), + ("mimikatz", "file", capa.features.file.Import("advapi32.CryptAcquireContext"), True), ("mimikatz", "file", capa.features.file.Import("CryptAcquireContextW"), True), ("mimikatz", "file", capa.features.file.Import("CryptAcquireContext"), True), # function/characteristic(loop)